From a19b6fd92c8c75f403cd098ba5fd89ac9fa7952c Mon Sep 17 00:00:00 2001 From: vg0204 Date: Mon, 16 Dec 2024 16:10:02 +0000 Subject: [PATCH 1/7] [AMDGPU] [GlobalIsel] Combine Fmul with Select into ldexp. This combine pattern perform the below transformation. fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b)) fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b)) where, A=2^a & B=2^b ; a and b are integers. It is a follow-up PR to implement the above combine for globalIsel, as it has been done for SelectionDAG Isel (PR-111109) --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 10 +++ .../Target/AMDGPU/AMDGPUCombinerHelper.cpp | 72 +++++++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h | 4 ++ 3 files changed, 86 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 985fa8f1deff9..c1eea0ad9b707 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule< [{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]), (apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>; +// Do the following combines : +// fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b)) +// fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b)) +def combine_fmul_with_select_to_ldexp : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_FMUL $dst, $x, $select):$root, + (G_SELECT $select, $y, $A, $B):$sel, + [{ return Helper.matchCombineFmulWithSelectToLdexp(*${root}, *${sel}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + let Predicates = [Has16BitInsts, NotHasMed3_16] in { // For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index e5a376ab7357c..d582ee892a481 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -445,3 +445,75 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI, Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1); MI.eraseFromParent(); } + +bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp( + MachineInstr &MI, MachineInstr &Sel, + std::function &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FMUL); + assert(Sel.getOpcode() == TargetOpcode::G_SELECT); + + Register Dst = MI.getOperand(0).getReg(); + LLT DestTy = MRI.getType(Dst); + LLT ScalarDestTy = DestTy.getScalarType(); + + if ((ScalarDestTy == LLT::float64() || ScalarDestTy == LLT::float32() || + ScalarDestTy == LLT::float16()) && + (MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))) { + Register SelectCond = Sel.getOperand(1).getReg(); + Register SelectTrue = Sel.getOperand(2).getReg(); + Register SelectFalse = Sel.getOperand(3).getReg(); + + const auto SelectTrueCst = + DestTy.isVector() + ? getFConstantSplat(SelectTrue, MRI, /* allowUndef */ true) + : getFConstantVRegValWithLookThrough(SelectTrue, MRI); + if (!SelectTrueCst) + return false; + const auto SelectFalseCst = + DestTy.isVector() + ? getFConstantSplat(SelectFalse, MRI, /* allowUndef */ true) + : getFConstantVRegValWithLookThrough(SelectFalse, MRI); + if (!SelectFalseCst) + return false; + + if (SelectTrueCst->Value.isNegative() != SelectFalseCst->Value.isNegative()) + return false; + + // For f32, only non-inline constants should be transformed. + const SIInstrInfo *TII = + (MI.getMF()->getSubtarget()).getInstrInfo(); + if (ScalarDestTy == LLT::float32() && + TII->isInlineConstant(SelectTrueCst->Value) && + TII->isInlineConstant(SelectFalseCst->Value)) + return false; + + int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs(); + if (SelectTrueVal == INT_MIN) + return false; + int SelectFalseVal = SelectFalseCst->Value.getExactLog2Abs(); + if (SelectFalseVal == INT_MIN) + return false; + + MatchInfo = [=, &MI](MachineIRBuilder &Builder) { + LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32)); + auto NewSel = + Builder.buildSelect(IntDestTy, SelectCond, + Builder.buildConstant(IntDestTy, SelectTrueVal), + Builder.buildConstant(IntDestTy, SelectFalseVal)); + + if (SelectTrueCst->Value.isNegative()) { + auto NegX = Builder.buildFNeg( + DestTy, MI.getOperand(1).getReg(), + MRI.getVRegDef(MI.getOperand(1).getReg())->getFlags()); + Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags()); + } else { + Builder.buildFLdexp(Dst, MI.getOperand(1).getReg(), NewSel, + MI.getFlags()); + } + }; + + return true; + } + + return false; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h index 6510abe9d2321..df03a9435b384 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h @@ -30,6 +30,10 @@ class AMDGPUCombinerHelper : public CombinerHelper { Register Src1, Register Src2); void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2); + + bool matchCombineFmulWithSelectToLdexp( + MachineInstr &MI, MachineInstr &Sel, + std::function &MatchInfo); }; } // namespace llvm From b4b4fb6c1b56ba23a5710384aceccaf316c12f70 Mon Sep 17 00:00:00 2001 From: vg0204 Date: Wed, 18 Dec 2024 06:54:22 +0000 Subject: [PATCH 2/7] Invoking the fmulCombine in both the AMDGPUPreLegalCombine and AMDGPUPostLegalCombiner. --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 14 +++++++------- llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 6 +++--- llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index c1eea0ad9b707..da47aaf8a3b5c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -125,14 +125,14 @@ def sign_extension_in_reg : GICombineRule< (apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>; // Do the following combines : -// fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b)) -// fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b)) -def combine_fmul_with_select_to_ldexp : GICombineRule< +// fmul x, select(y, A, B) -> fldexp (x, select i32 (y, a, b)) +// fmul x, select(y, -A, -B) -> fldexp ((fneg x), select i32 (y, a, b)) +def combine_fmul_with_select_to_fldexp : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (G_FMUL $dst, $x, $select):$root, (G_SELECT $select, $y, $A, $B):$sel, - [{ return Helper.matchCombineFmulWithSelectToLdexp(*${root}, *${sel}, ${matchinfo}); }]), - (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + [{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; let Predicates = [Has16BitInsts, NotHasMed3_16] in { @@ -163,13 +163,13 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>; def AMDGPUPreLegalizerCombiner: GICombiner< "AMDGPUPreLegalizerCombinerImpl", - [all_combines, clamp_i64_to_i16, foldable_fneg]> { + [all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg]> { let CombineAllMethodName = "tryCombineAllImpl"; } def AMDGPUPostLegalizerCombiner: GICombiner< "AMDGPUPostLegalizerCombinerImpl", - [all_combines, gfx6gfx7_combines, gfx8_combines, + [all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp, uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> { let CombineAllMethodName = "tryCombineAllImpl"; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index d582ee892a481..06da76d5049e5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -446,7 +446,7 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI, MI.eraseFromParent(); } -bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp( +bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp( MachineInstr &MI, MachineInstr &Sel, std::function &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_FMUL); @@ -465,13 +465,13 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp( const auto SelectTrueCst = DestTy.isVector() - ? getFConstantSplat(SelectTrue, MRI, /* allowUndef */ true) + ? getFConstantSplat(SelectTrue, MRI, /*allowUndef=*/false) : getFConstantVRegValWithLookThrough(SelectTrue, MRI); if (!SelectTrueCst) return false; const auto SelectFalseCst = DestTy.isVector() - ? getFConstantSplat(SelectFalse, MRI, /* allowUndef */ true) + ? getFConstantSplat(SelectFalse, MRI, /*allowUndef=*/false) : getFConstantVRegValWithLookThrough(SelectFalse, MRI); if (!SelectFalseCst) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h index df03a9435b384..9a0a4205ed54b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h @@ -31,7 +31,7 @@ class AMDGPUCombinerHelper : public CombinerHelper { void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2); - bool matchCombineFmulWithSelectToLdexp( + bool matchCombineFmulWithSelectToFldexp( MachineInstr &MI, MachineInstr &Sel, std::function &MatchInfo); }; From 795dae1099462a50d10790fd5d5e9bce6f1f571d Mon Sep 17 00:00:00 2001 From: vg0204 Date: Wed, 18 Dec 2024 07:06:17 +0000 Subject: [PATCH 3/7] updated the testCases, and increased the coverage of dagCombine-fmul-to-ldexp to include globalIsel in pipeline as well. --- llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll | 769 +-- .../CodeGen/AMDGPU/GlobalISel/llvm.powi.ll | 95 +- .../CodeGen/AMDGPU/dagcombine-fmul-sel.ll | 5208 ++++++++++------- llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll | 10 +- llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll | 2114 ++++--- llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll | 28 +- llvm/test/CodeGen/AMDGPU/llvm.exp2.ll | 1230 ++-- llvm/test/CodeGen/AMDGPU/llvm.log.ll | 1209 ++-- llvm/test/CodeGen/AMDGPU/llvm.log10.ll | 1209 ++-- llvm/test/CodeGen/AMDGPU/llvm.log2.ll | 1542 +++-- .../AMDGPU/pseudo-scalar-transcendental.ll | 97 +- 11 files changed, 7952 insertions(+), 5559 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll index 0577117e9d9e1..d81faf91801b0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -10,10 +10,10 @@ define float @v_pow_f32(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -25,19 +25,19 @@ define float @v_pow_f32(float %x, float %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -49,19 +49,19 @@ define float @v_pow_f32(float %x, float %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -73,17 +73,18 @@ define float @v_pow_f32(float %x, float %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -91,32 +92,34 @@ define float @v_pow_f32(float %x, float %y) { ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_pow_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %pow = call float @llvm.pow.f32(float %x, float %y) ret float %pow @@ -127,111 +130,114 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v4, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v5, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4 -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6 -; GFX6-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5] +; GFX6-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GFX6-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v5 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, 5, v4 ; GFX6-NEXT: v_log_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v4 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v4 ; GFX6-NEXT: v_log_f32_e32 v1, v1 -; GFX6-NEXT: v_mov_b32_e32 v6, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; GFX6-NEXT: v_sub_f32_e32 v0, v0, v7 -; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5] +; GFX6-NEXT: v_mov_b32_e32 v5, 0x42000000 +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; GFX6-NEXT: v_sub_f32_e32 v0, v0, v6 +; GFX6-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5] ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 ; GFX6-NEXT: v_sub_f32_e32 v1, v1, v5 -; GFX6-NEXT: v_mov_b32_e32 v7, 0x42800000 +; GFX6-NEXT: v_mov_b32_e32 v6, 0x42800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc ; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 -; GFX6-NEXT: v_add_f32_e32 v0, v0, v8 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5] +; GFX6-NEXT: v_add_f32_e32 v0, v0, v7 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] ; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5] -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_not_b32_e32 v4, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_v2f32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v4, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v5, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4 -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v6 -; GFX8-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5] +; GFX8-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX8-NEXT: v_ldexp_f32 v0, v0, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 5, v4 ; GFX8-NEXT: v_log_f32_e32 v0, v0 -; GFX8-NEXT: v_mul_f32_e32 v1, v1, v4 +; GFX8-NEXT: v_ldexp_f32 v1, v1, v4 ; GFX8-NEXT: v_log_f32_e32 v1, v1 -; GFX8-NEXT: v_mov_b32_e32 v6, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; GFX8-NEXT: v_sub_f32_e32 v0, v0, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5] +; GFX8-NEXT: v_mov_b32_e32 v5, 0x42000000 +; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; GFX8-NEXT: v_sub_f32_e32 v0, v0, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5] ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 ; GFX8-NEXT: v_sub_f32_e32 v1, v1, v5 -; GFX8-NEXT: v_mov_b32_e32 v7, 0x42800000 +; GFX8-NEXT: v_mov_b32_e32 v6, 0x42800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 -; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc ; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 -; GFX8-NEXT: v_add_f32_e32 v0, v0, v8 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5] +; GFX8-NEXT: v_add_f32_e32 v0, v0, v7 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] ; GFX8-NEXT: v_exp_f32_e32 v0, v0 ; GFX8-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v1, v1 -; GFX8-NEXT: v_mov_b32_e32 v4, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5] -; GFX8-NEXT: v_mul_f32_e32 v1, v1, v2 +; GFX8-NEXT: v_not_b32_e32 v4, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX8-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_v2f32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 1.0, v5, s[4:5] +; GFX9-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] +; GFX9-NEXT: v_ldexp_f32 v0, v0, v5 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 5, v4 ; GFX9-NEXT: v_log_f32_e32 v0, v0 -; GFX9-NEXT: v_mul_f32_e32 v1, v1, v4 +; GFX9-NEXT: v_ldexp_f32 v1, v1, v4 ; GFX9-NEXT: v_log_f32_e32 v1, v1 -; GFX9-NEXT: v_mov_b32_e32 v6, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc -; GFX9-NEXT: v_sub_f32_e32 v0, v0, v7 -; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[4:5] +; GFX9-NEXT: v_mov_b32_e32 v5, 0x42000000 +; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; GFX9-NEXT: v_sub_f32_e32 v0, v0, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5] ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 ; GFX9-NEXT: v_sub_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x42800000 +; GFX9-NEXT: v_mov_b32_e32 v6, 0x42800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 -; GFX9-NEXT: v_add_f32_e32 v0, v0, v8 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v7, s[4:5] +; GFX9-NEXT: v_add_f32_e32 v0, v0, v7 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[4:5] ; GFX9-NEXT: v_exp_f32_e32 v0, v0 ; GFX9-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v1, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5] -; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 +; GFX9-NEXT: v_not_b32_e32 v4, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[4:5] +; GFX9-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_v2f32: @@ -239,10 +245,12 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s4 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v4 -; GFX10-NEXT: v_mul_f32_e32 v1, v1, v5 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 5, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v4 +; GFX10-NEXT: v_ldexp_f32 v1, v1, v5 ; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s4 ; GFX10-NEXT: v_log_f32_e32 v0, v0 @@ -257,46 +265,54 @@ define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s4 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v3 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x1f800000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x1f800000, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s4 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 ; GFX10-NEXT: v_exp_f32_e32 v1, v1 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX10-NEXT: v_ldexp_f32 v1, v1, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_pow_v2f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s0 -; GFX11-NEXT: v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5 -; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, vcc_lo +; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX11-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_ldexp_f32 v1, v1, v5 ; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: v_log_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v2 :: v_dual_mul_dx9_zero_f32 v1, v1, v3 -; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_dual_sub_f32 v1, v1, v5 :: v_dual_lshlrev_b32 v4, 5, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_ldexp_f32 v0, v0, v4 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, vcc_lo +; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v1, v1, v3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0xc2fc0000, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 0x42800000, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_dual_add_f32 v0, v0, v2 :: v_dual_add_f32 v1, v1, v3 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x1f800000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x1f800000, s0 -; GFX11-NEXT: v_exp_f32_e32 v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_add_f32 v1, v1, v3 +; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 0xffffffc0, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_exp_f32_e32 v1, v1 +; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GFX11-NEXT: v_ldexp_f32 v1, v1, v3 +; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0xffffffc0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_exp_f32_e32 v0, v0 +; GFX11-NEXT: s_waitcnt_depctr 0xfff +; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) ret <2 x float> %pow @@ -316,9 +332,9 @@ define half @v_pow_f16(half %x, half %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -388,18 +404,18 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v3 -; GFX6-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc +; GFX6-NEXT: v_not_b32_e32 v3, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc ; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v4 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GFX6-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v6 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -508,17 +524,17 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 ; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc ; GFX6-NEXT: v_add_f32_e32 v1, v1, v5 -; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000 +; GFX6-NEXT: v_not_b32_e32 v5, 63 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GFX6-NEXT: v_exp_f32_e32 v1, v1 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_exp_f32_e32 v2, v0 -; GFX6-NEXT: v_mul_f32_e32 v0, v1, v6 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v5, vcc -; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v1, v6 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v1, v2, v1 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -634,17 +650,17 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 ; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000 +; GFX6-NEXT: v_not_b32_e32 v5, 63 ; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; GFX6-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 ; GFX6-NEXT: v_exp_f32_e32 v1, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6 -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v6 +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -764,17 +780,17 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3 ; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc ; GFX6-NEXT: v_add_f32_e32 v2, v2, v5 -; GFX6-NEXT: v_mov_b32_e32 v5, 0x1f800000 +; GFX6-NEXT: v_not_b32_e32 v5, 63 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc ; GFX6-NEXT: v_exp_f32_e32 v2, v2 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v1, v0 -; GFX6-NEXT: v_mul_f32_e32 v0, v2, v6 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v5, vcc -; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v6 +; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -885,10 +901,10 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX6-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX6-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -900,19 +916,19 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f32_fabs_lhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX8-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX8-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -924,19 +940,19 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_f32_fabs_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX9-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -948,17 +964,18 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_f32_fabs_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0| -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4 -; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX10-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -966,9 +983,9 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_pow_f32_fabs_lhs: @@ -976,23 +993,24 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) %pow = call float @llvm.pow.f32(float %fabs.x, float %y) @@ -1004,10 +1022,10 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1019,19 +1037,19 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f32_fabs_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1043,19 +1061,19 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_f32_fabs_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1067,17 +1085,18 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_f32_fabs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -1085,32 +1104,34 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_pow_f32_fabs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1| +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.y = call float @llvm.fabs.f32(float %y) %pow = call float @llvm.pow.f32(float %x, float %fabs.y) @@ -1122,10 +1143,10 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX6-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX6-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1137,19 +1158,19 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX8-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX8-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1161,19 +1182,19 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX9-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1185,17 +1206,18 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0| -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4 -; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX10-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -1203,9 +1225,9 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs: @@ -1213,23 +1235,24 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, |v1| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %fabs.x = call float @llvm.fabs.f32(float %x) %fabs.y = call float @llvm.fabs.f32(float %y) @@ -1241,10 +1264,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; GFX6-LABEL: v_pow_f32_sgpr_vgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX6-NEXT: v_mul_f32_e32 v1, s0, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v1, s0, v1 ; GFX6-NEXT: v_log_f32_e32 v1, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1256,18 +1279,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_pow_f32_sgpr_vgpr: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX8-NEXT: v_mul_f32_e32 v1, s0, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX8-NEXT: v_ldexp_f32 v1, s0, v1 ; GFX8-NEXT: v_log_f32_e32 v1, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1279,18 +1302,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_pow_f32_sgpr_vgpr: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX9-NEXT: v_mul_f32_e32 v1, s0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX9-NEXT: v_ldexp_f32 v1, s0, v1 ; GFX9-NEXT: v_log_f32_e32 v1, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1302,49 +1325,51 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: v_pow_f32_sgpr_vgpr: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cmp_gt_f32_e64 s1, 0x800000, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s1 -; GFX10-NEXT: v_mul_f32_e32 v1, s0, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX10-NEXT: v_ldexp_f32 v1, s0, v1 ; GFX10-NEXT: v_log_f32_e32 v1, v1 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v2 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: v_pow_f32_sgpr_vgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cmp_gt_f32_e64 s1, 0x800000, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s1 -; GFX11-NEXT: v_mul_f32_e32 v1, s0, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v1, s0, v1 ; GFX11-NEXT: v_log_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v1, v1, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: ; return to shader part epilog %pow = call float @llvm.pow.f32(float %x, float %y) ret float %pow @@ -1354,10 +1379,10 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX6-LABEL: v_pow_f32_vgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -1369,18 +1394,18 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_pow_f32_vgpr_sgpr: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -1392,18 +1417,18 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_pow_f32_vgpr_sgpr: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -1415,16 +1440,17 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: v_pow_f32_vgpr_sgpr: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1432,31 +1458,33 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: v_pow_f32_vgpr_sgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: ; return to shader part epilog %pow = call float @llvm.pow.f32(float %x, float %y) ret float %pow @@ -1466,10 +1494,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; GFX6-LABEL: v_pow_f32_sgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX6-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX6-NEXT: v_ldexp_f32_e32 v0, s0, v0 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -1481,18 +1509,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: v_pow_f32_sgpr_sgpr: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX8-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -1504,18 +1532,18 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_pow_f32_sgpr_sgpr: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX9-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -1527,49 +1555,51 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: v_pow_f32_sgpr_sgpr: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2 -; GFX10-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX10-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: v_pow_f32_sgpr_sgpr: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2 -; GFX11-NEXT: v_mul_f32_e32 v0, s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s1, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: ; return to shader part epilog %pow = call float @llvm.pow.f32(float %x, float %y) ret float %pow @@ -1580,10 +1610,10 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX6-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX6-NEXT: v_ldexp_f32_e64 v0, -v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1595,19 +1625,19 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f32_fneg_lhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX8-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX8-NEXT: v_ldexp_f32 v0, -v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1619,19 +1649,19 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_f32_fneg_lhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX9-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX9-NEXT: v_ldexp_f32 v0, -v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1643,17 +1673,18 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_f32_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, -v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s4 -; GFX10-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX10-NEXT: v_ldexp_f32 v0, -v0, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -1661,9 +1692,9 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_pow_f32_fneg_lhs: @@ -1671,23 +1702,24 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 -; GFX11-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_ldexp_f32 v0, -v0, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg float %x %pow = call float @llvm.pow.f32(float %neg.x, float %y) @@ -1699,10 +1731,10 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1714,19 +1746,19 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_exp_f32_e32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX6-NEXT: v_not_b32_e32 v1, 63 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_pow_f32_fneg_rhs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1738,19 +1770,19 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_pow_f32_fneg_rhs: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -1762,17 +1794,18 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_pow_f32_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -1780,32 +1813,34 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_pow_f32_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, v0, -v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.y = fneg float %y %pow = call float @llvm.pow.f32(float %x, float %neg.y) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll index eeb7b138fde31..fe002d69faf66 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll @@ -18,9 +18,9 @@ define i16 @v_powi_f16(i16 %l, i32 %r) { ; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc ; GFX7-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_exp_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_not_b32_e32 v1, 63 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -75,53 +75,80 @@ define i16 @v_powi_f16(i16 %l, i32 %r) { } define float @v_powi_f32(float %l, i32 %r) { -; GFX78-LABEL: v_powi_f32: -; GFX78: ; %bb.0: -; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX78-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX78-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX78-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; GFX78-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX78-NEXT: v_log_f32_e32 v0, v0 -; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GFX78-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX78-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GFX78-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GFX78-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX78-NEXT: v_exp_f32_e32 v0, v0 -; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX78-NEXT: s_setpc_b64 s[30:31] +; GFX7-LABEL: v_powi_f32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v2, 0x800000 +; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_log_f32_e32 v0, v0 +; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GFX7-NEXT: v_mov_b32_e32 v2, 0x42000000 +; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX7-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX7-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_exp_f32_e32 v0, v0 +; GFX7-NEXT: v_not_b32_e32 v1, 63 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_powi_f32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 +; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX8-NEXT: v_log_f32_e32 v0, v0 +; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1 +; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 +; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_exp_f32_e32 v0, v0 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_powi_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %res = call float @llvm.powi.f32.i32(float %l, i32 %r) ret float %res diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll index 5b72795ba07ea..b128be2186df2 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s -;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s -;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX1030 %s -;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX1100 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX7-LABEL: fmul_select_f32_test1: @@ -21,22 +25,22 @@ define float @fmul_select_f32_test1(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f32_test1: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test1: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f32_test1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f32_test1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 2.000000e+00, float 1.000000e+00 %ldexp = fmul float %x, %y @@ -60,22 +64,22 @@ define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f32_test2: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test2: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f32_test2: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f32_test2: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 5.000000e-01, float 1.000000e+00 %ldexp = fmul float %x, %y @@ -83,49 +87,71 @@ define float @fmul_select_f32_test2(float %x, i32 %bool.arg1, i32 %bool.arg2) { } define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { -; GFX7-LABEL: fmul_select_v2f32_test3: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_v2f32_test3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_v2f32_test3: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f32_test3: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 -; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_v2f32_test3: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_v2f32_test3: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_v2f32_test3: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_v2f32_test3: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: fmul_select_v2f32_test3: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo +; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_v2f32_test3: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x float> , <2 x float> %ldexp = fmul <2 x float> %x, %y @@ -133,49 +159,71 @@ define <2 x float> @fmul_select_v2f32_test3(<2 x float> %x, <2 x i32> %bool.arg1 } define <2 x float> @fmul_select_v2f32_test4(<2 x float> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { -; GFX7-LABEL: fmul_select_v2f32_test4: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_v2f32_test4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_v2f32_test4: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1030-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f32_test4: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 -; GFX1100-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_v2f32_test4: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_v2f32_test4: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_v2f32_test4: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_v2f32_test4: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: fmul_select_v2f32_test4: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo +; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_v2f32_test4: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_mul_f32 v1, v1, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x float> , <2 x float> %ldexp = fmul <2 x float> %x, %y @@ -199,22 +247,22 @@ define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f32_test5: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test5: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f32_test5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f32_test5: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, -1.0, -2.0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float -2.000000e+00, float -1.000000e+00 %ldexp = fmul float %x, %y @@ -222,44 +270,83 @@ define float @fmul_select_f32_test5(float %x, i32 %bool.arg1, i32 %bool.arg2) { } define float @fmul_select_f32_test6(float %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f32_test6: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f32_test6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f32_test6: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc0400000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test6: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc0400000 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f32_test6: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f32_test6: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x41000000 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f32_test6: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0400000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f32_test6: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x41000000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f32_test6: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f32_test6: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f32_test6: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0400000 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v3, vcc_lo +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f32_test6: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc0400000, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float -3.000000e+00, float 8.000000e+00 %ldexp = fmul float %x, %y @@ -285,22 +372,22 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool. ; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo -; GFX1030-NEXT: v_ldexp_f32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 0x43A0000000000000, float 0x45B0000000000000 %ldexp = fmul float %x, %y @@ -308,44 +395,83 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool. } define float @fmul_select_f32_test8(float %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f32_test8: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v3, 0xc1000000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0x41800000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f32_test8: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xc1000000 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x41800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f32_test8: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x41800000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test8: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x41800000 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f32_test8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f32_test8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f32_test8: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f32_test8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f32_test8: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f32_test8: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f32_test8: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc1000000, v3, vcc_lo +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f32_test8: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1000000 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x41800000, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 1.600000e+01, float -8.000000e+00 %ldexp = fmul float %x, %y @@ -369,22 +495,22 @@ define float @fmul_select_f32_test9(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f32_test9: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test9: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f32_test9: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f32_test9: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 2.0, 0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 0.000000e+00, float 2.000000e+00 %ldexp = fmul float %x, %y @@ -410,22 +536,22 @@ define float @fmul_select_f32_test10(float %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f32_test10: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test10: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f32_test10: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo +; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f32_test10: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x80000000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float -0.000000e+00, float 0.000000e+00 %ldexp = fmul float %x, %y @@ -451,22 +577,22 @@ define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool ; GFX9-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo -; GFX1030-NEXT: v_ldexp_f32 v0, -v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f32 v0, -v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo +; GFX10-NEXT: v_ldexp_f32 v0, -v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, -v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 0xC4D0000000000000, float 0xC370000000000000 %ldexp = fmul float %x, %y @@ -474,44 +600,83 @@ define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool } define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v3, 0x44 -; GFX7-NEXT: v_not_b32_e32 v4, 47 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x44 -; GFX9-NEXT: v_not_b32_e32 v4, 47 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_not_b32_e32 v3, 47 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo -; GFX1030-NEXT: v_ldexp_f32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_not_b32_e32 v3, 47 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo -; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x44 +; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 47 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_not_b32_e32 v3, 47 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x44 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x44 +; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 47 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_not_b32_e32 v3, 47 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x44 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_not_b32_e32 v3, 47 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x44 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_not_b32_e32 v3, 47 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x44 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xffffffd0, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 0x3CF0000000000000, float 0x4430000000000000 %ldexp = fmul float %x, %y @@ -535,22 +700,22 @@ define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f64_test1: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test1: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f64_test1: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test1: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 2.000000e+00, double 1.000000e+00 %ldexp = fmul double %x, %y @@ -574,22 +739,22 @@ define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f64_test2: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test2: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f64_test2: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test2: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 5.000000e-01, double 1.000000e+00 %ldexp = fmul double %x, %y @@ -619,28 +784,28 @@ define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.ar ; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_v2f64_test3: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f64_test3: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo -; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_v2f64_test3: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_v2f64_test3: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> %ldexp = fmul <2 x double> %x, %y @@ -670,28 +835,28 @@ define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.ar ; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_v2f64_test4: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f64_test4: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo -; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_v2f64_test4: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX10-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_v2f64_test4: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo +; GFX11-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> %ldexp = fmul <2 x double> %x, %y @@ -715,22 +880,22 @@ define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f64_test5: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test5: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f64_test5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test5: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double -5.000000e-01, double -1.000000e+00 %ldexp = fmul double %x, %y @@ -754,22 +919,22 @@ define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f64_test6: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test6: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f64_test6: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test6: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double -2.000000e+00, double -1.000000e+00 %ldexp = fmul double %x, %y @@ -777,44 +942,64 @@ define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) } define double @fmul_select_f64_test7(double %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f64_test7: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f64_test7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f64_test7: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test7: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f64_test7: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f64_test7: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc +; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f64_test7: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f64_test7: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xbff00000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 2.0, vcc +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: fmul_select_f64_test7: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo +; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test7: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, 2.0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 2.000000e+00, double -1.000000e+00 %ldexp = fmul double %x, %y @@ -838,22 +1023,22 @@ define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f64_test8: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test8: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f64_test8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test8: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double -4.000000e+00, double -3.200000e+01 %ldexp = fmul double %x, %y @@ -883,28 +1068,28 @@ define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.ar ; GFX9-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_v2f64_test9: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f64_test9: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo -; GFX1100-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_v2f64_test9: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX10-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX10-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX10-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_v2f64_test9: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX11-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX11-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> %ldexp = fmul <2 x double> %x, %y @@ -912,60 +1097,115 @@ define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.ar } define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { -; GFX7-LABEL: fmul_select_v2f64_test10: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v8, 0xbff00000 -; GFX7-NEXT: v_mov_b32_e32 v9, 0x3fe00000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX7-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX7-NEXT: v_mov_b32_e32 v8, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] -; GFX7-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_v2f64_test10: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v8, 0xbff00000 -; GFX9-NEXT: v_mov_b32_e32 v9, 0x3fe00000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] -; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_v2f64_test10: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v8, 0x3fe00000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0xbff00000, v8, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_mov_b32_e32 v8, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] -; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f64_test10: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v8, 0x3fe00000 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_dual_cndmask_b32 v9, 0xbff00000, v8 :: v_dual_mov_b32 v8, 0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] -; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_v2f64_test10: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mov_b32_e32 v8, 0xbff00000 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX7-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_v2f64_test10: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX7-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_v2f64_test10: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v8, 0xbff00000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_v2f64_test10: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v10, 0xbff00000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX9-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_v2f64_test10: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v8, 0x3fe00000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v9, 0xbff00000, v8, vcc_lo +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX10-SDAG-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_v2f64_test10: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v9, 0xbff00000 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_v2f64_test10: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v8, 0x3fe00000 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_dual_cndmask_b32 v9, 0xbff00000, v8 :: v_dual_mov_b32 v8, 0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_v2f64_test10: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_dual_mov_b32 v9, 0xbff00000 :: v_dual_mov_b32 v8, 0 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v9, v9, 0x3fe00000, vcc_lo +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> %ldexp = fmul <2 x double> %x, %y @@ -973,44 +1213,64 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a } define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f64_test11: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f64_test11: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_bfrev_b32_e32 v4, 1 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f64_test11: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test11: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f64_test11: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 1 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f64_test11: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc +; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f64_test11: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v4, 1 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f64_test11: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -2.0, vcc +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: fmul_select_f64_test11: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo +; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test11: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x80000000, -2.0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double -2.000000e+00, double -0.000000e+00 %ldexp = fmul double %x, %y @@ -1018,45 +1278,84 @@ define double @fmul_select_f64_test11(double %x, i32 %bool.arg1, i32 %bool.arg2) } define double @fmul_select_f64_test12(double %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f64_test12: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 31, v2 -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f64_test12: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 31, v2 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f64_test12: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v2, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 31, v3 -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test12: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3 -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f64_test12: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v3, 31, v2 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX7-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f64_test12: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_bfrev_b32_e32 v5, 1 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX7-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f64_test12: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, v2, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v3, 31, v2 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f64_test12: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v5, 1 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc +; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f64_test12: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v3, 31, v3 +; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f64_test12: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f64_test12: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v3 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 31, v3 +; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f64_test12: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v5, 0x80000000, 0, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 0.000000e+00, double -0.000000e+00 %ldexp = fmul double %x, %y @@ -1084,24 +1383,24 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f64_test13: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test13: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f64_test13: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo +; GFX10-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f64_test13: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-NEXT: v_cndmask_b32_e64 v5, 0x40300000, 0, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 0.000000e+00, double 1.600000e+01 %ldexp = fmul double %x, %y @@ -1109,44 +1408,83 @@ define double @fmul_select_f64_test13(double %x, i32 %bool.arg1, i32 %bool.arg2) } define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_not_b32_e32 v4, 26 -; GFX7-NEXT: v_mov_b32_e32 v5, 0x5c -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc -; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_not_b32_e32 v4, 26 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x5c -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v4, 0x5c -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v4, 0x5c -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo -; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 26 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v5, 0x5c +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX7-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x5c +; GFX7-GISEL-NEXT: v_not_b32_e32 v5, 26 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc +; GFX7-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 26 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x5c +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5c +; GFX9-GISEL-NEXT: v_not_b32_e32 v5, 26 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x5c +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 26 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v4, 0x5c +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 26 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0x5c, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 0x45B0000000000000, double 0x3E40000000000000 %ldexp = fmul double %x, %y @@ -1154,44 +1492,83 @@ define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bo } define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_not_b32_e32 v4, 32 -; GFX7-NEXT: v_not_b32_e32 v5, 41 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc -; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_not_b32_e32 v4, 32 -; GFX9-NEXT: v_not_b32_e32 v5, 41 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc -; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_not_b32_e32 v4, 41 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo -; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_not_b32_e32 v4, 41 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo -; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_not_b32_e32 v4, 32 +; GFX7-SDAG-NEXT: v_not_b32_e32 v5, 41 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX7-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_not_b32_e32 v4, 41 +; GFX7-GISEL-NEXT: v_not_b32_e32 v5, 32 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc +; GFX7-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_not_b32_e32 v4, 32 +; GFX9-SDAG-NEXT: v_not_b32_e32 v5, 41 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_not_b32_e32 v4, 41 +; GFX9-GISEL-NEXT: v_not_b32_e32 v5, 32 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, v5, v4, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_not_b32_e32 v4, 41 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_not_b32_e32 v4, 32 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_not_b32_e32 v4, 41 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_not_b32_e32 v4, 32 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, v4, 0xffffffd6, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 0x3D50000000000000, double 0x3DE0000000000000 %ldexp = fmul double %x, %y @@ -1200,40 +1577,82 @@ define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bo define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test1: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f16_test1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f16_test1: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test1: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f16_test1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f16_test1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f16_test1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f16_test1: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f16_test1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f16_test1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 2.000000e+00, half 1.000000e+00 %ldexp = fmul half %x, %y @@ -1241,47 +1660,89 @@ define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) { } define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test2: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f16_test2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc -; GFX9-NEXT: s_movk_i32 s4, 0x8000 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff -; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2 -; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f16_test2: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: s_movk_i32 s4, 0x8000 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo -; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff -; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test2: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_movk_i32 s0, 0x8000 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff -; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test2: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test2: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f16_test2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f16_test2: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f16_test2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff +; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f16_test2: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f16_test2: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f16_test2: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 5.000000e-01, half 1.000000e+00 %ldexp = fmul half %x, %y @@ -1289,59 +1750,126 @@ define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) { } define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { -; GFX7-LABEL: fmul_select_v2f16_test3: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc -; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_v2f16_test3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00 -; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_v2f16_test3: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo -; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f16_test3: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo -; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_v2f16_test3: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_v2f16_test3: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_v2f16_test3: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3c00 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x4000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_v2f16_test3: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4 +; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_v2f16_test3: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX10-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_v2f16_test3: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1 +; GFX10-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3 +; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_v2f16_test3: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_v2f16_test3: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2 +; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x half> , <2 x half> %ldexp = fmul <2 x half> %x, %y @@ -1349,59 +1877,126 @@ define <2 x half> @fmul_select_v2f16_test3(<2 x half> %x, <2 x i32> %bool.arg1, } define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { -; GFX7-LABEL: fmul_select_v2f16_test4: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc -; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_v2f16_test4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v5, 0x3c00 -; GFX9-NEXT: v_mov_b32_e32 v6, 0x3800 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_v2f16_test4: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3800 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo -; GFX1030-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX1030-NEXT: v_pk_mul_f16 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2f16_test4: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3800 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo -; GFX1100-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_pk_mul_f16 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_v2f16_test4: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_v2f16_test4: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_v2f16_test4: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3c00 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3800 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_v2f16_test4: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4 +; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_v2f16_test4: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x3800 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX10-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_v2f16_test4: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1 +; GFX10-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3 +; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_v2f16_test4: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x3800 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3c00, v5, vcc_lo +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v5, vcc_lo +; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_v2f16_test4: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3 +; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2 +; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x half> , <2 x half> %ldexp = fmul <2 x half> %x, %y @@ -1409,15 +2004,25 @@ define <2 x half> @fmul_select_v2f16_test4(<2 x half> %x, <2 x i32> %bool.arg1, } define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test5: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test5: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test5: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test5: ; GFX9: ; %bb.0: @@ -1427,22 +2032,22 @@ define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f16_test5: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo -; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test5: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f16_test5: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo +; GFX10-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f16_test5: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 2.000000e+00, half 8.000000e+00 %ldexp = fmul half %x, %y @@ -1450,46 +2055,88 @@ define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) { } define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test6: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f16_test6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4200 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xc800 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f16_test6: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0xc800 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test6: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0xc800 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test6: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test6: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x4200 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f16_test6: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4200 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc800 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f16_test6: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4200 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f16_test6: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo +; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f16_test6: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4200 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f16_test6: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0xc800 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4200, v3, vcc_lo +; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f16_test6: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4200 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc800, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half -8.000000e+00, half 3.000000e+00 %ldexp = fmul half %x, %y @@ -1497,45 +2144,87 @@ define half @fmul_select_f16_test6(half %x, i32 %bool.arg1, i32 %bool.arg2) { } define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test7: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f16_test7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xc400 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x4800 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f16_test7: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4800 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test7: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4800 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test7: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test7: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc400 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f16_test7: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc400 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4800 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f16_test7: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc400 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f16_test7: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo +; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f16_test7: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc400 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f16_test7: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v3, 0x4800 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xc400, v3, vcc_lo +; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f16_test7: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc400 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4800, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 8.000000e+00, half -4.000000e+00 %ldexp = fmul half %x, %y @@ -1543,16 +2232,28 @@ define half @fmul_select_f16_test7(half %x, i32 %bool.arg1, i32 %bool.arg2) { } define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test8: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_bfrev_b32_e32 v3, 1 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v3, 1 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x8000 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test8: ; GFX9: ; %bb.0: @@ -1563,22 +2264,22 @@ define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX1030-LABEL: fmul_select_f16_test8: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test8: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: fmul_select_f16_test8: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: fmul_select_f16_test8: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half -0.000000e+00, half 0.000000e+00 %ldexp = fmul half %x, %y @@ -1586,40 +2287,87 @@ define half @fmul_select_f16_test8(half %x, i32 %bool.arg1, i32 %bool.arg2) { } define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test9: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f16_test9: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc -; GFX9-NEXT: v_ldexp_f16_e64 v0, -v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f16_test9: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo -; GFX1030-NEXT: v_ldexp_f16_e64 v0, -v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test9: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_ldexp_f16_e64 v0, -v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test9: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test9: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 5, v1 +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f16_test9: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f16_test9: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 5, v1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f16_test9: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f16_test9: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f16_test9: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f16_test9: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1 +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half -1.600000e+01, half -3.200000e+01 %ldexp = fmul half %x, %y @@ -1627,47 +2375,82 @@ define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) { } define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc -; GFX9-NEXT: s_movk_i32 s4, 0x8000 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff -; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2 -; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: s_movk_i32 s4, 0x8000 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo -; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff -; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_movk_i32 s0, 0x8000 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff -; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo +; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff +; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 0xH1000, half 0xH6800 %ldexp = fmul half %x, %y @@ -1675,47 +2458,82 @@ define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.a } define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc -; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc -; GFX9-NEXT: s_movk_i32 s4, 0x8000 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff -; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2 -; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: s_movk_i32 s4, 0x8000 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo -; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff -; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_movk_i32 s0, 0x8000 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff -; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc +; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: s_movk_i32 s4, 0x8000 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo +; GFX10-SDAG-NEXT: v_med3_i32 v1, v1, s4, 0x7fff +; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 0xH5800, half 0xH0400 %ldexp = fmul half %x, %y @@ -1723,72 +2541,114 @@ define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.ar } define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test1: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test1: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test1: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3f80 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test1: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat 2.000000e+00, bfloat 1.000000e+00 %ldexp = fmul bfloat %x, %y @@ -1796,72 +2656,114 @@ define bfloat @fmul_select_bf16_test1(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) } define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test2: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x3f80 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x3f00 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test2: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3f00 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test2: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test2: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test2: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3f80 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3f00 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test2: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x3f00 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test2: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test2: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x3f00 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test2: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat 5.000000e-01, bfloat 1.000000e+00 %ldexp = fmul bfloat %x, %y @@ -1869,111 +2771,158 @@ define bfloat @fmul_select_bf16_test2(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) } define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { -; GFX7-LABEL: fmul_select_v2bf16_test3: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc -; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_v2bf16_test3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80 -; GFX9-NEXT: v_mov_b32_e32 v6, 0x4000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4 -; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-NEXT: s_mov_b32 s4, 0x7060302 -; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_v2bf16_test3: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0x4000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1 -; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff -; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo -; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2bf16_test3: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v5, 0x4000 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 -; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1 -; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff -; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_v2bf16_test3: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 2.0, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 2.0, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_v2bf16_test3: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_v2bf16_test3: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f80 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x4000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-SDAG-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-SDAG-NEXT: v_add3_u32 v3, v3, v1, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_add3_u32 v2, v2, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x7060302 +; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v1, s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_v2bf16_test3: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_v2bf16_test3: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX10-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX10-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX10-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_v2bf16_test3: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_v2bf16_test3: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x4000 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX11-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX11-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_v2bf16_test3: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x bfloat> , <2 x bfloat> %ldexp = fmul <2 x bfloat> %x, %y @@ -1981,111 +2930,158 @@ define <2 x bfloat> @fmul_select_v2bf16_test3(<2 x bfloat> %x, <2 x i32> %bool.a } define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.arg1, <2 x i32> %bool.arg2) { -; GFX7-LABEL: fmul_select_v2bf16_test4: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 -; GFX7-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc -; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v1, v1, v3 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_v2bf16_test4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v5, 0x3f80 -; GFX9-NEXT: v_mov_b32_e32 v6, 0x3f00 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX9-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX9-NEXT: v_bfe_u32 v3, v1, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_add3_u32 v3, v3, v1, s4 -; GFX9-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 -; GFX9-NEXT: v_bfe_u32 v2, v0, 16, 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_add3_u32 v2, v2, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-NEXT: s_mov_b32 s4, 0x7060302 -; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_v2bf16_test4: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3f00 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1030-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX1030-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX1030-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1030-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX1030-NEXT: v_bfe_u32 v3, v0, 16, 1 -; GFX1030-NEXT: v_add3_u32 v2, v2, v1, 0x7fff -; GFX1030-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX1030-NEXT: v_add3_u32 v3, v3, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo -; GFX1030-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_v2bf16_test4: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v5, 0x3f00 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 -; GFX1100-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX1100-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 -; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 -; GFX1100-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX1100-NEXT: v_bfe_u32 v3, v0, 16, 1 -; GFX1100-NEXT: v_bfe_u32 v2, v1, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v4, 0x400000, v1 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-NEXT: v_add3_u32 v3, v3, v0, 0x7fff -; GFX1100-NEXT: v_add3_u32 v2, v2, v1, 0x7fff -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_v2bf16_test4: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0.5, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0.5, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, v1, v3 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_v2bf16_test4: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_v2bf16_test4: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f80 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3f00 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v2, v4 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-SDAG-NEXT: v_bfe_u32 v3, v1, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-SDAG-NEXT: v_add3_u32 v3, v3, v1, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v2, v0, 16, 1 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_add3_u32 v2, v2, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v3, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x7060302 +; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v1, s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_v2bf16_test4: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_v2bf16_test4: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f00 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX10-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX10-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX10-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_v2bf16_test4: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_v2bf16_test4: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0x3f00 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x3f80, v5, vcc_lo +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v4 +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v2, 0x3f80, v5, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_dual_mul_f32 v0, v0, v2 :: v_dual_lshlrev_b32 v1, 16, v1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v5, 0x400000, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v3, v0, 16, 1 +; GFX11-SDAG-NEXT: v_bfe_u32 v2, v1, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v4, 0x400000, v1 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_add3_u32 v3, v3, v0, 0x7fff +; GFX11-SDAG-NEXT: v_add3_u32 v2, v2, v1, 0x7fff +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x7060302 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_v2bf16_test4: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v3 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x bfloat> , <2 x bfloat> %ldexp = fmul <2 x bfloat> %x, %y @@ -2093,73 +3089,108 @@ define <2 x bfloat> @fmul_select_v2bf16_test4(<2 x bfloat> %x, <2 x i32> %bool.a } define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test5: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4100 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test5: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test5: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test5: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test5: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test5: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test5: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test5: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test5: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test5: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x4000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4100, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test5: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat 2.000000e+00, bfloat 8.000000e+00 %ldexp = fmul bfloat %x, %y @@ -2167,74 +3198,116 @@ define bfloat @fmul_select_bf16_test5(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) } define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test6: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x40400000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1000000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4040 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc100 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test6: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc100 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test6: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test6: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x40400000 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc1000000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test6: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0xc100 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0x4040 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test6: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4040 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffc100 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test6: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc100 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4040 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test6: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc100 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test6: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4040 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc100, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test6: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0xffffc100 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4040, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test6: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4040 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xc100, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat -8.000000e+00, bfloat 3.000000e+00 %ldexp = fmul bfloat %x, %y @@ -2242,73 +3315,115 @@ define bfloat @fmul_select_bf16_test6(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) } define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test7: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc080 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x4100 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test7: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4100 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test7: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test7: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0x41000000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, -4.0, v3, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test7: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v4, 0xc080 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test7: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc080 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x4100 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test7: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc080 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc +; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test7: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x4100 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test7: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xc080 +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4100, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test7: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x4100 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc080, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test7: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xc080 +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x4100, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat 8.000000e+00, bfloat -4.000000e+00 %ldexp = fmul bfloat %x, %y @@ -2316,73 +3431,111 @@ define bfloat @fmul_select_bf16_test7(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) } define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test8: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 31, v1 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test8: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 15 -; GFX9-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test8: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; GFX1030-NEXT: v_lshlrev_b16 v1, 15, v1 -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test8: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_lshlrev_b16 v1, 15, v1 -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 31, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x8000 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX7-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test8: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 15 +; GFX9-SDAG-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x8000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test8: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 15, v1 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test8: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test8: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshlrev_b16 v1, 15, v1 +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test8: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x8000, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat -0.000000e+00, bfloat 0.000000e+00 %ldexp = fmul bfloat %x, %y @@ -2390,74 +3543,121 @@ define bfloat @fmul_select_bf16_test8(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) } define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test9: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test9: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffc200 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffc180 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test9: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffc180 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test9: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test9: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2000000 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0xc1800000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test9: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 5, v1 +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test9: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc200 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffc180 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test9: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 5, v1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test9: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffc180 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test9: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test9: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0xffffc180 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffc200, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test9: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 5, v1 +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat -1.600000e+01, bfloat -3.200000e+01 %ldexp = fmul bfloat %x, %y @@ -2465,74 +3665,111 @@ define bfloat @fmul_select_bf16_test9(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) } define bfloat @fmul_select_bf16_test10_sel_log2val_pos65_pos56(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000 -; GFX7-NEXT: v_bfrev_b32_e32 v4, 7 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xffffdb80 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffe000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0xffffe000 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v3, 0xdb800000 +; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v4, 7 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v3, 0x41 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffdb80 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffe000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x41 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0xffffe000 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 56, 0x41, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0xffffe000 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0xffffdb80, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test10_sel_log2val_pos65_pos56: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 56, 0x41, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat 0xRE000, bfloat 0xRDB80 %ldexp = fmul bfloat %x, %y @@ -2540,74 +3777,111 @@ define bfloat @fmul_select_bf16_test10_sel_log2val_pos65_pos56(bfloat %x, i32 %b } define bfloat @fmul_select_bf16_test11_sel_log2val_neg22_pos25(bfloat %x, i32 %bool.arg1, i32 %bool.arg2) { -; GFX7-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7-NEXT: v_bfrev_b32_e32 v3, 50 -; GFX7-NEXT: v_mov_b32_e32 v4, 0x34800000 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4c00 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x3480 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 -; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1030-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: -; GFX1030: ; %bb.0: -; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3480 -; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo -; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1030-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1030-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1030-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1030-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1030-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1030-NEXT: s_setpc_b64 s[30:31] -; -; GFX1100-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: -; GFX1100: ; %bb.0: -; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0 -; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo -; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: v_bfe_u32 v1, v0, 16, 1 -; GFX1100-NEXT: v_or_b32_e32 v2, 0x400000, v0 -; GFX1100-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX1100-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX1100-NEXT: s_setpc_b64 s[30:31] +; GFX7-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_bfrev_b32_e32 v3, 50 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v4, 0x34800000 +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX7-GISEL-NEXT: v_not_b32_e32 v3, 21 +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-GISEL-NEXT: v_cndmask_b32_e32 v1, 25, v3, vcc +; GFX7-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x4c00 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3480 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff +; GFX9-SDAG-NEXT: v_add3_u32 v1, v1, v0, s4 +; GFX9-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX9-SDAG-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_not_b32_e32 v3, 21 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 25, v3, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0x3480 +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX10-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX10-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 25, 0xffffffea, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_dual_mov_b32 v3, 0x3480 :: v_dual_lshlrev_b32 v0, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v1, 0x4c00, v3, vcc_lo +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GFX11-SDAG-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GFX11-SDAG-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: fmul_select_bf16_test11_sel_log2val_neg22_pos25: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 25, 0xffffffea, vcc_lo +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, bfloat 0xR3480, bfloat 0xR4C00 %ldexp = fmul bfloat %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll index ebfb5e9ccaa35..a324ba35b155f 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll @@ -1625,14 +1625,12 @@ define float @v_recip_sqrt_f32_ulp25_contract(float %x) { ; CODEGEN-IEEE-GISEL: ; %bb.0: ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v2, 0x4b800000 ; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc +; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; CODEGEN-IEEE-GISEL-NEXT: v_rsq_f32_e32 v0, v0 -; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x45800000 -; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; CODEGEN-IEEE-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc +; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; CODEGEN-IEEE-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; IR-IEEE-SDAG-LABEL: v_recip_sqrt_f32_ulp25_contract: diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll index 104e157e9e15a..9ae60f99d5e09 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll @@ -3307,488 +3307,458 @@ define amdgpu_ps i32 @s_mul_32_f16(half inreg %x, half inreg %y) { ; -------------------------------------------------------------------- define float @v_mul_f32_select_64_1(i32 %arg, float %x) { -; GFX9-SDAG-LABEL: v_mul_f32_select_64_1: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc -; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f32_select_64_1: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: v_mul_f32_select_64_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f32_select_64_1: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f32_select_64_1: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX1011-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] + %cond = icmp eq i32 %arg, 0 + %select.pow2 = select i1 %cond, float 64.0, float 1.0 + %mul = fmul float %x, %select.pow2 + ret float %mul +} + +define float @v_mul_f32_select_1_64(i32 %arg, float %x) { +; GFX9-LABEL: v_mul_f32_select_1_64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f32_select_64_1: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f32_select_1_64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX1011-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] + %cond = icmp eq i32 %arg, 0 + %select.pow2 = select i1 %cond, float 1.0, float 64.0 + %mul = fmul float %x, %select.pow2 + ret float %mul +} + +define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) { +; GFX9-LABEL: v_mul_f32_select_n1_n64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f32_select_64_1: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f32_select_n1_n64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] + %cond = icmp eq i32 %arg, 0 + %select.pow2 = select i1 %cond, float -1.0, float -64.0 + %mul = fmul float %x, %select.pow2 + ret float %mul +} + +define float @v_mul_f32_select_n64_n1(i32 %arg, float %x) { +; GFX9-LABEL: v_mul_f32_select_n64_n1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f32_select_64_1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f32_select_n64_n1: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float 64.0, float 1.0 + %select.pow2 = select i1 %cond, float -64.0, float -1.0 %mul = fmul float %x, %select.pow2 ret float %mul } -define float @v_mul_f32_select_1_64(i32 %arg, float %x) { -; GFX9-SDAG-LABEL: v_mul_f32_select_1_64: +define float @v_mul_f32_select_128_64(i32 %arg, float %x) { +; GFX9-SDAG-LABEL: v_mul_f32_select_128_64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc ; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_mul_f32_select_1_64: +; GFX9-GISEL-LABEL: v_mul_f32_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 1.0, vcc -; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f32_select_1_64: +; GFX10-SDAG-LABEL: v_mul_f32_select_128_64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo ; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f32_select_1_64: +; GFX10-GISEL-LABEL: v_mul_f32_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f32_select_1_64: +; GFX11-SDAG-LABEL: v_mul_f32_select_128_64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo ; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f32_select_1_64: +; GFX11-GISEL-LABEL: v_mul_f32_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float 1.0, float 64.0 + %select.pow2 = select i1 %cond, float 128.0, float 64.0 %mul = fmul float %x, %select.pow2 ret float %mul } -define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) { -; GFX9-SDAG-LABEL: v_mul_f32_select_n1_n64: +define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) { +; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc ; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_mul_f32_select_n1_n64: +; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1.0, vcc -; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f32_select_n1_n64: +; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo ; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f32_select_n1_n64: +; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f32_select_n1_n64: +; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo ; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f32_select_n1_n64: +; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float -1.0, float -64.0 + %select.pow2 = select i1 %cond, float -128.0, float -64.0 %mul = fmul float %x, %select.pow2 ret float %mul } -define float @v_mul_f32_select_n64_n1(i32 %arg, float %x) { -; GFX9-SDAG-LABEL: v_mul_f32_select_n64_n1: +define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) { +; GFX9-LABEL: v_mul_f32_select_n128_n16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc +; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX1011-LABEL: v_mul_f32_select_n128_n16: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] + %cond = icmp eq i32 %arg, 0 + %select.pow2 = select i1 %cond, float -128.0, float -16.0 + %mul = fmul float %x, %select.pow2 + ret float %mul +} + +define float @v_contract_mul_add_f32_select_64_1(i32 %arg, float %x, float %y) { +; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_64_1: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc -; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc +; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_mul_f32_select_n64_n1: +; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_64_1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f32_select_n64_n1: +; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_64_1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo +; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f32_select_n64_n1: +; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_64_1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f32_select_n64_n1: +; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_64_1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo +; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f32_select_n64_n1: +; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_64_1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float -64.0, float -1.0 - %mul = fmul float %x, %select.pow2 - ret float %mul + %select.pow2 = select contract i1 %cond, float 64.0, float 1.0 + %mul = fmul contract float %x, %select.pow2 + %fma = fadd contract float %mul, %y + ret float %fma } -define float @v_mul_f32_select_128_64(i32 %arg, float %x) { -; GFX9-SDAG-LABEL: v_mul_f32_select_128_64: +define float @v_contract_mul_add_f32_select_1_64(i32 %arg, float %x, float %y) { +; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_1_64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc -; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, 1.0, vcc +; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_mul_f32_select_128_64: +; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_1_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x43000000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f32_select_128_64: +; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_1_64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo +; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f32_select_128_64: +; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_1_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x43000000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f32_select_128_64: +; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_1_64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo +; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f32_select_128_64: +; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_1_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x43000000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float 128.0, float 64.0 - %mul = fmul float %x, %select.pow2 - ret float %mul + %select.pow2 = select contract i1 %cond, float 1.0, float 64.0 + %mul = fmul contract float %x, %select.pow2 + %fma = fadd contract float %mul, %y + ret float %fma } -define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) { -; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n64: +define float @v_contract_mul_add_f32_select_n64_n1(i32 %arg, float %x, float %y) { +; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_n64_n1: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2800000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc -; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, -1.0, v3, vcc +; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n64: +; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_n64_n1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc3000000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2800000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n64: +; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_n64_n1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo +; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n64: +; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_n64_n1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n64: +; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_n64_n1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo +; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n64: +; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_n64_n1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float -128.0, float -64.0 - %mul = fmul float %x, %select.pow2 - ret float %mul + %select.pow2 = select contract i1 %cond, float -64.0, float -1.0 + %mul = fmul contract float %x, %select.pow2 + %fma = fadd contract float %mul, %y + ret float %fma } -define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) { -; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n16: +define float @v_contract_mul_add_f32_select_n1_n64(i32 %arg, float %x, float %y) { +; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_n1_n64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2800000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc -; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, -1.0, vcc +; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n16: +; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_n1_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc3000000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xc1800000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n16: +; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_n1_n64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo +; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n16: +; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_n1_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xc1800000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n16: +; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_n1_n64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo +; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n16: +; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_n1_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xc1800000 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xc3000000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] - %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float -128.0, float -16.0 - %mul = fmul float %x, %select.pow2 - ret float %mul -} - -define float @v_contract_mul_add_f32_select_64_1(i32 %arg, float %x, float %y) { -; GFX9-LABEL: v_contract_mul_add_f32_select_64_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1011-LABEL: v_contract_mul_add_f32_select_64_1: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo -; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX1011-NEXT: s_setpc_b64 s[30:31] - %cond = icmp eq i32 %arg, 0 - %select.pow2 = select contract i1 %cond, float 64.0, float 1.0 - %mul = fmul contract float %x, %select.pow2 - %fma = fadd contract float %mul, %y - ret float %fma -} - -define float @v_contract_mul_add_f32_select_1_64(i32 %arg, float %x, float %y) { -; GFX9-LABEL: v_contract_mul_add_f32_select_1_64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, 1.0, vcc -; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1011-LABEL: v_contract_mul_add_f32_select_1_64: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo -; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX1011-NEXT: s_setpc_b64 s[30:31] - %cond = icmp eq i32 %arg, 0 - %select.pow2 = select contract i1 %cond, float 1.0, float 64.0 - %mul = fmul contract float %x, %select.pow2 - %fma = fadd contract float %mul, %y - ret float %fma -} - -define float @v_contract_mul_add_f32_select_n64_n1(i32 %arg, float %x, float %y) { -; GFX9-LABEL: v_contract_mul_add_f32_select_n64_n1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xc2800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, -1.0, v3, vcc -; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1011-LABEL: v_contract_mul_add_f32_select_n64_n1: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo -; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX1011-NEXT: s_setpc_b64 s[30:31] - %cond = icmp eq i32 %arg, 0 - %select.pow2 = select contract i1 %cond, float -64.0, float -1.0 - %mul = fmul contract float %x, %select.pow2 - %fma = fadd contract float %mul, %y - ret float %fma -} - -define float @v_contract_mul_add_f32_select_n1_n64(i32 %arg, float %x, float %y) { -; GFX9-LABEL: v_contract_mul_add_f32_select_n1_n64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xc2800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, -1.0, vcc -; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1011-LABEL: v_contract_mul_add_f32_select_n1_n64: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo -; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, float -1.0, float -64.0 %mul = fmul contract float %x, %select.pow2 @@ -3810,11 +3780,11 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y) ; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x43000000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x42800000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_128_64: @@ -3829,10 +3799,11 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y) ; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x43000000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_128_64: @@ -3847,10 +3818,11 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y) ; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x43000000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f32 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, float 128.0, float 64.0 @@ -3860,22 +3832,57 @@ define float @v_contract_mul_add_f32_select_128_64(i32 %arg, float %x, float %y) } define float @v_contract_mul_add_f32_select_128_4(i32 %arg, float %x, float %y) { -; GFX9-LABEL: v_contract_mul_add_f32_select_128_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x43000000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, 4.0, v3, vcc -; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_128_4: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 4.0, v3, vcc +; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1011-LABEL: v_contract_mul_add_f32_select_128_4: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4.0, 0x43000000, vcc_lo -; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX1011-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_128_4: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_128_4: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4.0, 0x43000000, vcc_lo +; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_128_4: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_128_4: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4.0, 0x43000000, vcc_lo +; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_128_4: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, float 128.0, float 4.0 %mul = fmul contract float %x, %select.pow2 @@ -3907,203 +3914,124 @@ define float @v_contract_mul_add_f32_select_2_4(i32 %arg, float %x, float %y) { } define float @v_contract_mul_add_f32_select_4_128(i32 %arg, float %x, float %y) { -; GFX9-LABEL: v_contract_mul_add_f32_select_4_128: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x43000000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v3, 4.0, vcc -; GFX9-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX1011-LABEL: v_contract_mul_add_f32_select_4_128: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x43000000, 4.0, vcc_lo -; GFX1011-NEXT: v_fma_f32 v0, v1, v0, v2 -; GFX1011-NEXT: s_setpc_b64 s[30:31] - %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, float 4.0, float 128.0 - %mul = fmul contract float %x, %select.pow2 - %fma = fadd contract float %mul, %y - ret float %fma -} - -define double @v_mul_f64_select_64_1(i32 %arg, double %x) { -; GFX9-SDAG-LABEL: v_mul_f64_select_64_1: +; GFX9-SDAG-LABEL: v_contract_mul_add_f32_select_4_128: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc -; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, 4.0, vcc +; GFX9-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_mul_f64_select_64_1: +; GFX9-GISEL-LABEL: v_contract_mul_add_f32_select_4_128: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x3ff00000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc +; GFX9-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-SDAG-LABEL: v_mul_f64_select_64_1: +; GFX10-SDAG-LABEL: v_contract_mul_add_f32_select_4_128: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x43000000, 4.0, vcc_lo +; GFX10-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-GISEL-LABEL: v_mul_f64_select_64_1: +; GFX10-GISEL-LABEL: v_contract_mul_add_f32_select_4_128: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x3ff00000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40500000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f64_select_64_1: +; GFX11-SDAG-LABEL: v_contract_mul_add_f32_select_4_128: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0x43000000, 4.0, vcc_lo +; GFX11-SDAG-NEXT: v_fma_f32 v0, v1, v0, v2 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f64_select_64_1: +; GFX11-GISEL-LABEL: v_contract_mul_add_f32_select_4_128: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x3ff00000 :: v_dual_mov_b32 v3, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40500000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, double 64.0, double 1.0 - %mul = fmul double %x, %select.pow2 - ret double %mul + %select.pow2 = select i1 %cond, float 4.0, float 128.0 + %mul = fmul contract float %x, %select.pow2 + %fma = fadd contract float %mul, %y + ret float %fma } -define double @v_mul_f64_select_1_64(i32 %arg, double %x) { -; GFX9-SDAG-LABEL: v_mul_f64_select_1_64: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc -; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f64_select_1_64: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3ff00000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40500000 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: v_mul_f64_select_1_64: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: v_mul_f64_select_1_64: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x3ff00000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: v_mul_f64_select_1_64: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +define double @v_mul_f64_select_64_1(i32 %arg, double %x) { +; GFX9-LABEL: v_mul_f64_select_64_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f64_select_1_64: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x3ff00000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f64_select_64_1: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX1011-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 - %select.pow2 = select i1 %cond, double 1.0, double 64.0 + %select.pow2 = select i1 %cond, double 64.0, double 1.0 %mul = fmul double %x, %select.pow2 ret double %mul -} - -define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) { -; GFX9-SDAG-LABEL: v_mul_f64_select_n1_n64: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc -; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f64_select_n1_n64: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0500000 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: v_mul_f64_select_n1_n64: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: v_mul_f64_select_n1_n64: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0500000 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xbff00000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +} + +define double @v_mul_f64_select_1_64(i32 %arg, double %x) { +; GFX9-LABEL: v_mul_f64_select_1_64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: v_mul_f64_select_n1_n64: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f64_select_1_64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX1011-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] + %cond = icmp eq i32 %arg, 0 + %select.pow2 = select i1 %cond, double 1.0, double 64.0 + %mul = fmul double %x, %select.pow2 + ret double %mul +} + +define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) { +; GFX9-LABEL: v_mul_f64_select_n1_n64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f64_select_n1_n64: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0500000 :: v_dual_mov_b32 v3, 0 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xbff00000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f64_select_n1_n64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX1011-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double -1.0, double -64.0 %mul = fmul double %x, %select.pow2 @@ -4122,12 +4050,10 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) { ; GFX9-GISEL-LABEL: v_mul_f64_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x40600000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x40500000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_f64_select_128_64: @@ -4141,11 +4067,10 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) { ; GFX10-GISEL-LABEL: v_mul_f64_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x40500000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40600000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_f64_select_128_64: @@ -4159,10 +4084,10 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) { ; GFX11-GISEL-LABEL: v_mul_f64_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0x40600000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double 128.0, double 64.0 @@ -4182,12 +4107,10 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) { ; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0600000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0500000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n64: @@ -4201,11 +4124,10 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) { ; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0500000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n64: @@ -4219,10 +4141,10 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) { ; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0500000 :: v_dual_mov_b32 v3, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double -128.0, double -64.0 @@ -4231,59 +4153,21 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) { } define double @v_mul_f64_select_n128_n16(i32 %arg, double %x) { -; GFX9-SDAG-LABEL: v_mul_f64_select_n128_n16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc -; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n16: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0600000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc0300000 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc -; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n16: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n16: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xc0300000 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n16: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: v_mul_f64_select_n128_n16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n16: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0xc0300000 :: v_dual_mov_b32 v3, 0 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, v4, 0xc0600000, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f64_select_n128_n16: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX1011-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double -128.0, double -16.0 %mul = fmul double %x, %select.pow2 @@ -4305,12 +4189,10 @@ define double @v_contract_mul_add_f64_select_64_1(i32 %arg, double %x, double %y ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_64_1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x3ff00000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_64_1: @@ -4326,11 +4208,10 @@ define double @v_contract_mul_add_f64_select_64_1(i32 %arg, double %x, double %y ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_64_1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40500000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_64_1: @@ -4345,10 +4226,10 @@ define double @v_contract_mul_add_f64_select_64_1(i32 %arg, double %x, double %y ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_64_1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x3ff00000 :: v_dual_mov_b32 v5, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40500000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, double 64.0, double 1.0 @@ -4372,12 +4253,10 @@ define double @v_contract_mul_add_f64_select_1_64(i32 %arg, double %x, double %y ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_1_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40500000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_1_64: @@ -4393,11 +4272,10 @@ define double @v_contract_mul_add_f64_select_1_64(i32 %arg, double %x, double %y ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_1_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x3ff00000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_1_64: @@ -4412,10 +4290,10 @@ define double @v_contract_mul_add_f64_select_1_64(i32 %arg, double %x, double %y ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_1_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40500000 :: v_dual_mov_b32 v5, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x3ff00000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, double 1.0, double 64.0 @@ -4439,12 +4317,10 @@ define double @v_contract_mul_add_f64_select_n64_n1(i32 %arg, double %x, double ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xc0500000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0xbff00000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_n64_n1: @@ -4460,11 +4336,10 @@ define double @v_contract_mul_add_f64_select_n64_n1(i32 %arg, double %x, double ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0xbff00000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xc0500000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_n64_n1: @@ -4479,10 +4354,10 @@ define double @v_contract_mul_add_f64_select_n64_n1(i32 %arg, double %x, double ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_n64_n1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0xbff00000 :: v_dual_mov_b32 v5, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xc0500000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, double -64.0, double -1.0 @@ -4506,12 +4381,10 @@ define double @v_contract_mul_add_f64_select_n1_n64(i32 %arg, double %x, double ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xbff00000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0xc0500000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_n1_n64: @@ -4527,11 +4400,10 @@ define double @v_contract_mul_add_f64_select_n1_n64(i32 %arg, double %x, double ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0xc0500000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xbff00000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_n1_n64: @@ -4546,10 +4418,10 @@ define double @v_contract_mul_add_f64_select_n1_n64(i32 %arg, double %x, double ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_n1_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0xc0500000 :: v_dual_mov_b32 v5, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0xbff00000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, double -1.0, double -64.0 @@ -4573,12 +4445,11 @@ define double @v_contract_mul_add_f64_select_128_64(i32 %arg, double %x, double ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40500000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_128_64: @@ -4594,11 +4465,11 @@ define double @v_contract_mul_add_f64_select_128_64(i32 %arg, double %x, double ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40500000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_128_64: @@ -4613,10 +4484,11 @@ define double @v_contract_mul_add_f64_select_128_64(i32 %arg, double %x, double ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40500000 :: v_dual_mov_b32 v5, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double 128.0, double 64.0 @@ -4640,12 +4512,10 @@ define double @v_contract_mul_add_f64_select_128_4(i32 %arg, double %x, double % ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_128_4: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40100000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_128_4: @@ -4661,11 +4531,10 @@ define double @v_contract_mul_add_f64_select_128_4(i32 %arg, double %x, double % ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_128_4: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_128_4: @@ -4680,10 +4549,10 @@ define double @v_contract_mul_add_f64_select_128_4(i32 %arg, double %x, double % ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_128_4: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40100000 :: v_dual_mov_b32 v5, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40600000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double 128.0, double 4.0 @@ -4706,21 +4575,50 @@ define double @v_contract_mul_add_f64_select_2_4(i32 %arg, double %x, double %y) ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_2_4: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 2.0, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX1011-LABEL: v_contract_mul_add_f64_select_2_4: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_mov_b32_e32 v5, 0 -; GFX1011-NEXT: v_cndmask_b32_e64 v6, 0x40100000, 2.0, vcc_lo -; GFX1011-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] -; GFX1011-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_2_4: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-SDAG-NEXT: v_mov_b32_e32 v5, 0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v6, 0x40100000, 2.0, vcc_lo +; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_2_4: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0 +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_2_4: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_mov_b32_e32 v5, 0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v6, 0x40100000, 2.0, vcc_lo +; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_2_4: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0 +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double 2.0, double 4.0 %mul = fmul contract double %x, %select.pow2 @@ -4743,12 +4641,10 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double % ; GFX9-GISEL-LABEL: v_contract_mul_add_f64_select_4_128: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x40100000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x40600000 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc +; GFX9-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f64_select_4_128: @@ -4764,11 +4660,10 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double % ; GFX10-GISEL-LABEL: v_contract_mul_add_f64_select_4_128: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v6, 0x40600000 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_mov_b32_e32 v5, 0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40100000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f64_select_4_128: @@ -4783,10 +4678,10 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double % ; GFX11-GISEL-LABEL: v_contract_mul_add_f64_select_4_128: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_dual_mov_b32 v6, 0x40600000 :: v_dual_mov_b32 v5, 0 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v6, v6, 0x40100000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], v[1:2], v[5:6], v[3:4] +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 +; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, double 4.0, double 128.0 @@ -4796,57 +4691,21 @@ define double @v_contract_mul_add_f64_select_4_128(i32 %arg, double %x, double % } define half @v_mul_f16_select_64_1(i32 %arg, half %x) { -; GFX9-SDAG-LABEL: v_mul_f16_select_64_1: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc -; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f16_select_64_1: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: v_mul_f16_select_64_1: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: v_mul_f16_select_64_1: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5400, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: v_mul_f16_select_64_1: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: v_mul_f16_select_64_1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f16_select_64_1: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5400, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f16_select_64_1: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX1011-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half 64.0, half 1.0 %mul = fmul half %x, %select.pow2 @@ -4854,57 +4713,21 @@ define half @v_mul_f16_select_64_1(i32 %arg, half %x) { } define half @v_mul_f16_select_1_64(i32 %arg, half %x) { -; GFX9-SDAG-LABEL: v_mul_f16_select_1_64: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc -; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f16_select_1_64: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: v_mul_f16_select_1_64: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: v_mul_f16_select_1_64: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x3c00, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: v_mul_f16_select_1_64: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: v_mul_f16_select_1_64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f16_select_1_64: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x3c00, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f16_select_1_64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX1011-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half 1.0, half 64.0 %mul = fmul half %x, %select.pow2 @@ -4912,57 +4735,21 @@ define half @v_mul_f16_select_1_64(i32 %arg, half %x) { } define half @v_mul_f16_select_n1_n64(i32 %arg, half %x) { -; GFX9-SDAG-LABEL: v_mul_f16_select_n1_n64: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc -; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f16_select_n1_n64: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xbc00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: v_mul_f16_select_n1_n64: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: v_mul_f16_select_n1_n64: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xbc00, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: v_mul_f16_select_n1_n64: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: v_mul_f16_select_n1_n64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f16_select_n1_n64: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xbc00, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f16_select_n1_n64: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX1011-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half -1.0, half -64.0 %mul = fmul half %x, %select.pow2 @@ -4981,11 +4768,13 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) { ; GFX9-GISEL-LABEL: v_mul_f16_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x5800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_f16_select_128_64: @@ -4999,10 +4788,12 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) { ; GFX10-GISEL-LABEL: v_mul_f16_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5800, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_f16_select_128_64: @@ -5016,10 +4807,12 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) { ; GFX11-GISEL-LABEL: v_mul_f16_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x5400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0x5800, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half 128.0, half 64.0 @@ -5039,11 +4832,13 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) { ; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xd800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v2, v3 +; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n64: @@ -5057,10 +4852,12 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) { ; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2 +; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n64: @@ -5074,10 +4871,12 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) { ; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xd400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half -128.0, half -64.0 @@ -5086,57 +4885,21 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) { } define half @v_mul_f16_select_n128_n16(i32 %arg, half %x) { -; GFX9-SDAG-LABEL: v_mul_f16_select_n128_n16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc -; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n16: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xd800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n16: -; GFX10-SDAG: ; %bb.0: -; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo -; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 -; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n16: -; GFX10-GISEL: ; %bb.0: -; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, 0xcc00 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo -; GFX10-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n16: -; GFX11-SDAG: ; %bb.0: -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo -; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX9-LABEL: v_mul_f16_select_n128_n16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc +; GFX9-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n16: -; GFX11-GISEL: ; %bb.0: -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0xcc00 -; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0xd800, vcc_lo -; GFX11-GISEL-NEXT: v_mul_f16_e32 v0, v1, v0 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX1011-LABEL: v_mul_f16_select_n128_n16: +; GFX1011: ; %bb.0: +; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX1011-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX1011-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX1011-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half -128.0, half -16.0 %mul = fmul half %x, %select.pow2 @@ -5157,11 +4920,10 @@ define half @v_contract_mul_add_f16_select_64_1(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_64_1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_64_1: @@ -5176,10 +4938,10 @@ define half @v_contract_mul_add_f16_select_64_1(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_64_1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_64_1: @@ -5194,10 +4956,10 @@ define half @v_contract_mul_add_f16_select_64_1(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_64_1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, half 64.0, half 1.0 @@ -5220,11 +4982,10 @@ define half @v_contract_mul_add_f16_select_1_64(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_1_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_1_64: @@ -5239,10 +5000,10 @@ define half @v_contract_mul_add_f16_select_1_64(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_1_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_1_64: @@ -5257,10 +5018,10 @@ define half @v_contract_mul_add_f16_select_1_64(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_1_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, half 1.0, half 64.0 @@ -5283,11 +5044,10 @@ define half @v_contract_mul_add_f16_select_n64_n1(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_n64_n1: @@ -5302,10 +5062,10 @@ define half @v_contract_mul_add_f16_select_n64_n1(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd400, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_n64_n1: @@ -5320,10 +5080,10 @@ define half @v_contract_mul_add_f16_select_n64_n1(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_n64_n1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd400, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, half -64.0, half -1.0 @@ -5346,11 +5106,10 @@ define half @v_contract_mul_add_f16_select_n1_n64(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_n1_n64: @@ -5365,10 +5124,10 @@ define half @v_contract_mul_add_f16_select_n1_n64(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_n1_n64: @@ -5383,10 +5142,10 @@ define half @v_contract_mul_add_f16_select_n1_n64(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_n1_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e64 v0, -v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select contract i1 %cond, half -1.0, half -64.0 @@ -5409,11 +5168,14 @@ define half @v_contract_mul_add_f16_select_128_64(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_128_64: @@ -5428,10 +5190,13 @@ define half @v_contract_mul_add_f16_select_128_64(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_128_64: @@ -5446,10 +5211,13 @@ define half @v_contract_mul_add_f16_select_128_64(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half 128.0, half 64.0 @@ -5472,11 +5240,10 @@ define half @v_contract_mul_add_f16_select_128_4(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_128_4: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_128_4: @@ -5491,10 +5258,10 @@ define half @v_contract_mul_add_f16_select_128_4(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_128_4: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_128_4: @@ -5509,10 +5276,10 @@ define half @v_contract_mul_add_f16_select_128_4(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_128_4: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half 128.0, half 4.0 @@ -5535,11 +5302,14 @@ define half @v_contract_mul_add_f16_select_2_4(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_2_4: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 2, v0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_2_4: @@ -5554,10 +5324,13 @@ define half @v_contract_mul_add_f16_select_2_4(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_2_4: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4000, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_2_4: @@ -5572,10 +5345,13 @@ define half @v_contract_mul_add_f16_select_2_4(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_2_4: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4000, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half 2.0, half 4.0 @@ -5598,11 +5374,10 @@ define half @v_contract_mul_add_f16_select_4_128(i32 %arg, half %x, half %y) { ; GFX9-GISEL-LABEL: v_contract_mul_add_f16_select_4_128: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x4400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_f16_select_4_128: @@ -5617,10 +5392,10 @@ define half @v_contract_mul_add_f16_select_4_128(i32 %arg, half %x, half %y) { ; GFX10-GISEL-LABEL: v_contract_mul_add_f16_select_4_128: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4400, vcc_lo -; GFX10-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_f16_select_4_128: @@ -5635,10 +5410,10 @@ define half @v_contract_mul_add_f16_select_4_128(i32 %arg, half %x, half %y) { ; GFX11-GISEL-LABEL: v_contract_mul_add_f16_select_4_128: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x4400, vcc_lo -; GFX11-GISEL-NEXT: v_fma_f16 v0, v1, v0, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, half 4.0, half 128.0 @@ -5664,15 +5439,13 @@ define <2 x half> @v_mul_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x) { ; GFX9-GISEL-LABEL: v_mul_v2f16_select_64_1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_v2f16_select_64_1: @@ -5690,14 +5463,14 @@ define <2 x half> @v_mul_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x) { ; GFX10-GISEL-LABEL: v_mul_v2f16_select_64_1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5400, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_v2f16_select_64_1: @@ -5715,14 +5488,15 @@ define <2 x half> @v_mul_v2f16_select_64_1(<2 x i32> %arg, <2 x half> %x) { ; GFX11-GISEL-LABEL: v_mul_v2f16_select_64_1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5400, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5400, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -5747,15 +5521,13 @@ define <2 x half> @v_mul_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x) { ; GFX9-GISEL-LABEL: v_mul_v2f16_select_1_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_v2f16_select_1_64: @@ -5773,14 +5545,14 @@ define <2 x half> @v_mul_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x) { ; GFX10-GISEL-LABEL: v_mul_v2f16_select_1_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x3c00, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_v2f16_select_1_64: @@ -5798,14 +5570,15 @@ define <2 x half> @v_mul_v2f16_select_1_64(<2 x i32> %arg, <2 x half> %x) { ; GFX11-GISEL-LABEL: v_mul_v2f16_select_1_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x3c00, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x3c00, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -5830,15 +5603,14 @@ define <2 x half> @v_mul_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x) { ; GFX9-GISEL-LABEL: v_mul_v2f16_select_n1_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xbc00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_v2f16_select_n1_n64: @@ -5856,14 +5628,15 @@ define <2 x half> @v_mul_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x) { ; GFX10-GISEL-LABEL: v_mul_v2f16_select_n1_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo +; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xbc00, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_v2f16_select_n1_n64: @@ -5881,14 +5654,16 @@ define <2 x half> @v_mul_v2f16_select_n1_n64(<2 x i32> %arg, <2 x half> %x) { ; GFX11-GISEL-LABEL: v_mul_v2f16_select_n1_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xbc00, vcc_lo +; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xbc00, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -5913,15 +5688,19 @@ define <2 x half> @v_mul_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x) { ; GFX9-GISEL-LABEL: v_mul_v2f16_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x5800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 6, v1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4 +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_v2f16_select_128_64: @@ -5939,14 +5718,19 @@ define <2 x half> @v_mul_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x) { ; GFX10-GISEL-LABEL: v_mul_v2f16_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5800, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_v2f16_select_128_64: @@ -5964,14 +5748,20 @@ define <2 x half> @v_mul_v2f16_select_128_64(<2 x i32> %arg, <2 x half> %x) { ; GFX11-GISEL-LABEL: v_mul_v2f16_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0x5800, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0x5800, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -5996,15 +5786,20 @@ define <2 x half> @v_mul_v2f16_select_n128_n64(<2 x i32> %arg, <2 x half> %x) { ; GFX9-GISEL-LABEL: v_mul_v2f16_select_n128_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 6, v1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v3, v4 +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_v2f16_select_n128_n64: @@ -6022,14 +5817,20 @@ define <2 x half> @v_mul_v2f16_select_n128_n64(<2 x i32> %arg, <2 x half> %x) { ; GFX10-GISEL-LABEL: v_mul_v2f16_select_n128_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo +; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_v2f16_select_n128_n64: @@ -6047,14 +5848,21 @@ define <2 x half> @v_mul_v2f16_select_n128_n64(<2 x i32> %arg, <2 x half> %x) { ; GFX11-GISEL-LABEL: v_mul_v2f16_select_n128_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xd400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo +; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v3 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6079,15 +5887,14 @@ define <2 x half> @v_mul_v2f16_select_n128_n16(<2 x i32> %arg, <2 x half> %x) { ; GFX9-GISEL-LABEL: v_mul_v2f16_select_n128_n16: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xd800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xcc00 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 4, 7, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_mul_v2f16_select_n128_n16: @@ -6105,14 +5912,15 @@ define <2 x half> @v_mul_v2f16_select_n128_n16(<2 x i32> %arg, <2 x half> %x) { ; GFX10-GISEL-LABEL: v_mul_v2f16_select_n128_n16: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo +; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 4, 7, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_mul_v2f16_select_n128_n16: @@ -6130,14 +5938,16 @@ define <2 x half> @v_mul_v2f16_select_n128_n16(<2 x i32> %arg, <2 x half> %x) { ; GFX11-GISEL-LABEL: v_mul_v2f16_select_n128_n16: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0xcc00 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v3, 0xd800, vcc_lo +; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 4, 7, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0xd800, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, v2, v0 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6162,15 +5972,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_64_1(<2 x i32> %arg, <2 x hal ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x3c00 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_64_1: @@ -6188,14 +5997,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_64_1(<2 x i32> %arg, <2 x hal ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5400, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5400, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_64_1: @@ -6213,14 +6023,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_64_1(<2 x i32> %arg, <2 x hal ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_64_1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5400, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5400, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6246,15 +6058,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_1_64(<2 x i32> %arg, <2 x hal ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x3c00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_1_64: @@ -6272,14 +6083,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_1_64(<2 x i32> %arg, <2 x hal ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x3c00, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x3c00, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_1_64: @@ -6297,14 +6109,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_1_64(<2 x i32> %arg, <2 x hal ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_1_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x3c00, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x3c00, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6330,15 +6144,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n64_n1(<2 x i32> %arg, <2 x h ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xbc00 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_n64_n1: @@ -6356,14 +6170,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n64_n1(<2 x i32> %arg, <2 x h ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xd400, vcc_lo +; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xd400, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_n64_n1: @@ -6381,14 +6197,17 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n64_n1(<2 x i32> %arg, <2 x h ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_n64_n1: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xd400, vcc_lo +; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 6, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xd400, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6414,15 +6233,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n1_n64(<2 x i32> %arg, <2 x h ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xbc00 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xd400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_n1_n64: @@ -6440,14 +6259,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n1_n64(<2 x i32> %arg, <2 x h ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xbc00, vcc_lo +; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xbc00, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_n1_n64: @@ -6465,14 +6286,17 @@ define <2 x half> @v_contract_mul_add_v2f16_select_n1_n64(<2 x i32> %arg, <2 x h ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_n1_n64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0xd400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0xbc00, vcc_lo +; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 6, 0, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0xbc00, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6498,15 +6322,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_64(<2 x i32> %arg, <2 x h ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0 +; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 6, v1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v4, v5 +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v4, v5 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_64: @@ -6524,14 +6353,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_64(<2 x i32> %arg, <2 x h ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo +; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 6, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_64: @@ -6549,14 +6384,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_64(<2 x i32> %arg, <2 x h ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_64: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 6, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x7fff :: v_dual_add_nc_u32 v1, 6, v1 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4 +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v5, v1 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6582,15 +6423,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_4(<2 x i32> %arg, <2 x ha ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x4400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 2, 7, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_4: @@ -6608,14 +6448,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_4(<2 x i32> %arg, <2 x ha ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 2, 7, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_128_4: @@ -6633,14 +6474,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_128_4(<2 x i32> %arg, <2 x ha ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_128_4: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x5800, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 2, 7, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 2, 7, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x5800, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6666,15 +6509,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_2_4(<2 x i32> %arg, <2 x half ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4000 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x4400 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 2, v0 +; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 2, v1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xffff8000 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff +; GFX9-GISEL-NEXT: v_med3_i32 v0, v0, v4, v5 +; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v4, v5 +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_2_4: @@ -6692,14 +6540,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_2_4(<2 x i32> %arg, <2 x half ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4000, vcc_lo +; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4 +; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 2, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4000, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_2_4: @@ -6717,14 +6571,20 @@ define <2 x half> @v_contract_mul_add_v2f16_select_2_4(<2 x i32> %arg, <2 x half ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_2_4: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4000, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX11-GISEL-NEXT: v_dual_mov_b32 v4, 0x7fff :: v_dual_add_nc_u32 v1, 2, v1 +; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v0, v4 +; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v4 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v5, v1 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4000, vcc_lo ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> @@ -6750,15 +6610,14 @@ define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x ha ; GFX9-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 -; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0x5800 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 7, 2, vcc +; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX9-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_contract_mul_add_v2f16_select_4_128: @@ -6776,14 +6635,15 @@ define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x ha ; GFX10-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128: ; GFX10-GISEL: ; %bb.0: ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800 ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4400, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo ; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 7, 2, vcc_lo ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4400, vcc_lo +; GFX10-GISEL-NEXT: v_ldexp_f16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX10-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-LABEL: v_contract_mul_add_v2f16_select_4_128: @@ -6801,14 +6661,16 @@ define <2 x half> @v_contract_mul_add_v2f16_select_4_128(<2 x i32> %arg, <2 x ha ; GFX11-GISEL-LABEL: v_contract_mul_add_v2f16_select_4_128: ; GFX11-GISEL: ; %bb.0: ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v4, 0x5800 ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, v4, 0x4400, vcc_lo +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 7, 2, vcc_lo ; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 7, 2, vcc_lo ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v4, 0x4400, vcc_lo +; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v1 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, v2, v0, v3 +; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq <2 x i32> %arg, zeroinitializer %select.pow2 = select <2 x i1> %cond, <2 x half> , <2 x half> diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll index b3001819e9aaf..c1d5b5857b6b5 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll @@ -2380,14 +2380,12 @@ define float @v_sqrt_f32_ulp2_contractable_rcp(float %x) { ; GISEL-IEEE: ; %bb.0: ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 -; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4b800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc +; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0 -; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x45800000 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc +; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_rcp: @@ -2734,20 +2732,18 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_rcp(<2 x float> %x) { ; GISEL-IEEE: ; %bb.0: ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 -; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x4b800000 ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 24, vcc ; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 -; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v4 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5] +; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 24, s[4:5] ; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0 -; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2 +; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GISEL-IEEE-NEXT: v_rsq_f32_e32 v1, v1 -; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x45800000 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc -; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v4, s[4:5] -; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, vcc +; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, s[4:5] +; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll index ac515808a0d8a..333d428c84bcc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll @@ -41,10 +41,10 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; @@ -78,9 +78,9 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: v_add_f32_e32 v0, s2, v0 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v2, v0, v1 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v2, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 ; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 @@ -115,9 +115,9 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) { ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s0, v0 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] @@ -203,7 +203,7 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x1f800000 +; SI-GISEL-NEXT: v_not_b32_e32 v2, 63 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 @@ -213,10 +213,10 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v3, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v3, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v3, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -252,7 +252,7 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x1f800000 +; VI-GISEL-NEXT: v_not_b32_e32 v2, 63 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 @@ -262,10 +262,10 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-GISEL-NEXT: v_add_f32_e32 v0, s7, v0 ; VI-GISEL-NEXT: v_exp_f32_e32 v3, v3 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v3, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] +; VI-GISEL-NEXT: v_ldexp_f32 v0, v3, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -300,7 +300,7 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x1f800000 +; GFX900-GISEL-NEXT: v_not_b32_e32 v2, 63 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0 @@ -310,10 +310,10 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s11, v0 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v3, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v3, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm @@ -421,17 +421,17 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000 +; SI-GISEL-NEXT: v_not_b32_e32 v3, 63 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; SI-GISEL-NEXT: v_add_f32_e32 v0, s0, v0 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v4 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc ; SI-GISEL-NEXT: v_add_f32_e32 v4, s1, v4 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1 @@ -439,11 +439,11 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-GISEL-NEXT: v_exp_f32_e32 v4, v4 ; SI-GISEL-NEXT: v_add_f32_e32 v1, s2, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v4, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm @@ -487,16 +487,16 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000 +; VI-GISEL-NEXT: v_not_b32_e32 v3, 63 ; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; VI-GISEL-NEXT: v_add_f32_e32 v0, s0, v0 ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v4 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc ; VI-GISEL-NEXT: v_add_f32_e32 v4, s1, v4 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1 @@ -504,10 +504,10 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL-NEXT: v_add_f32_e32 v1, s2, v1 ; VI-GISEL-NEXT: v_exp_f32_e32 v4, v4 ; VI-GISEL-NEXT: v_exp_f32_e32 v2, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] +; VI-GISEL-NEXT: v_ldexp_f32 v1, v4, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] @@ -551,15 +551,15 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x1f800000 +; GFX900-GISEL-NEXT: v_not_b32_e32 v3, 63 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; GFX900-GISEL-NEXT: v_add_f32_e32 v0, s0, v0 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v4 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v4 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc ; GFX900-GISEL-NEXT: v_add_f32_e32 v4, s1, v4 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1 @@ -567,10 +567,10 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s2, v1 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v4, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v3, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v4, v1 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v4, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm @@ -710,7 +710,7 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000 +; SI-GISEL-NEXT: v_not_b32_e32 v4, 63 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc @@ -720,22 +720,22 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_add_f32_e32 v1, s9, v1 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] ; SI-GISEL-NEXT: v_add_f32_e32 v5, s10, v5 ; SI-GISEL-NEXT: v_add_f32_e32 v2, s11, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5 ; SI-GISEL-NEXT: v_exp_f32_e32 v3, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v5, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v3, v4 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 @@ -787,7 +787,7 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000 +; VI-GISEL-NEXT: v_not_b32_e32 v4, 63 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc @@ -797,22 +797,22 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_add_f32_e32 v1, s9, v1 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v5 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] ; VI-GISEL-NEXT: v_add_f32_e32 v5, s10, v5 ; VI-GISEL-NEXT: v_add_f32_e32 v2, s11, v2 ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 ; VI-GISEL-NEXT: v_exp_f32_e32 v3, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] +; VI-GISEL-NEXT: v_ldexp_f32 v2, v5, v2 +; VI-GISEL-NEXT: v_ldexp_f32 v3, v3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] @@ -863,7 +863,7 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x1f800000 +; GFX900-GISEL-NEXT: v_not_b32_e32 v4, 63 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc @@ -873,22 +873,22 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s9, v1 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 1.0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, v1, v5 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_add_f32_e32 v5, s10, v5 ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, s11, v2 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v3, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v4, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 1.0, v4, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, v5, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, v3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v5, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v3, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm @@ -1006,19 +1006,19 @@ define float @v_exp2_f32(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32: ; VI-SDAG: ; %bb.0: @@ -1034,6 +1034,20 @@ define float @v_exp2_f32(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1048,6 +1062,20 @@ define float @v_exp2_f32(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1076,19 +1104,19 @@ define float @v_exp2_fabs_f32(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_fabs_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_fabs_f32: ; VI-SDAG: ; %bb.0: @@ -1104,6 +1132,20 @@ define float @v_exp2_fabs_f32(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_fabs_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1118,6 +1160,20 @@ define float @v_exp2_fabs_f32(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_fabs_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1147,19 +1203,19 @@ define float @v_exp2_fneg_fabs_f32(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_fneg_fabs_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0| -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_fneg_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0| +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_fneg_fabs_f32: ; VI-SDAG: ; %bb.0: @@ -1175,6 +1231,20 @@ define float @v_exp2_fneg_fabs_f32(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_fneg_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0| +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_fneg_fabs_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1189,6 +1259,20 @@ define float @v_exp2_fneg_fabs_f32(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_fneg_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e64 v0, v1, |v0| +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_fneg_fabs_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1219,19 +1303,19 @@ define float @v_exp2_fneg_f32(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_fneg_f32: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_fneg_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_fneg_f32: ; VI-SDAG: ; %bb.0: @@ -1247,6 +1331,20 @@ define float @v_exp2_fneg_f32(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_fneg_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_fneg_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1261,6 +1359,20 @@ define float @v_exp2_fneg_f32(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_fneg_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_fneg_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1290,19 +1402,19 @@ define float @v_exp2_f32_fast(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_fast: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_fast: ; VI-SDAG: ; %bb.0: @@ -1318,6 +1430,20 @@ define float @v_exp2_f32_fast(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_fast: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_fast: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1332,6 +1458,20 @@ define float @v_exp2_f32_fast(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_fast: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1360,19 +1500,19 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_unsafe_math_attr: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_unsafe_math_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_unsafe_math_attr: ; VI-SDAG: ; %bb.0: @@ -1388,6 +1528,20 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_unsafe_math_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_unsafe_math_attr: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1402,6 +1556,20 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_unsafe_math_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_unsafe_math_attr: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1430,19 +1598,19 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_approx_fn_attr: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_approx_fn_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_approx_fn_attr: ; VI-SDAG: ; %bb.0: @@ -1458,6 +1626,20 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_approx_fn_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_approx_fn_attr: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1472,6 +1654,20 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_approx_fn_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_approx_fn_attr: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1500,19 +1696,19 @@ define float @v_exp2_f32_ninf(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_ninf: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_ninf: ; VI-SDAG: ; %bb.0: @@ -1528,6 +1724,20 @@ define float @v_exp2_f32_ninf(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_ninf: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1542,6 +1752,20 @@ define float @v_exp2_f32_ninf(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1570,19 +1794,19 @@ define float @v_exp2_f32_afn(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_afn: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_afn: ; VI-SDAG: ; %bb.0: @@ -1598,6 +1822,20 @@ define float @v_exp2_f32_afn(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_afn: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1612,6 +1850,20 @@ define float @v_exp2_f32_afn(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_afn: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1660,19 +1912,19 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_afn_dynamic: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_afn_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_afn_dynamic: ; VI-SDAG: ; %bb.0: @@ -1688,6 +1940,20 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_afn_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_afn_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1702,6 +1968,20 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_afn_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_afn_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1730,19 +2010,19 @@ define float @v_fabs_exp2_f32_afn(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_fabs_exp2_f32_afn: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_fabs_exp2_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_fabs_exp2_f32_afn: ; VI-SDAG: ; %bb.0: @@ -1758,6 +2038,20 @@ define float @v_fabs_exp2_f32_afn(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_fabs_exp2_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_fabs_exp2_f32_afn: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1772,6 +2066,20 @@ define float @v_fabs_exp2_f32_afn(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_fabs_exp2_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e64 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_fabs_exp2_f32_afn: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1821,19 +2129,19 @@ define float @v_exp2_f32_nnan(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_nnan: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_nnan: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_nnan: ; VI-SDAG: ; %bb.0: @@ -1849,6 +2157,20 @@ define float @v_exp2_f32_nnan(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_nnan: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_nnan: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1863,6 +2185,20 @@ define float @v_exp2_f32_nnan(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1911,19 +2247,19 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_nnan_dynamic: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_nnan_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_nnan_dynamic: ; VI-SDAG: ; %bb.0: @@ -1939,6 +2275,20 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_nnan_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_nnan_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1953,6 +2303,20 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_nnan_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2001,19 +2365,19 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_ninf_dynamic: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_ninf_dynamic: ; VI-SDAG: ; %bb.0: @@ -2029,6 +2393,20 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_ninf_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2043,6 +2421,20 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_ninf_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2071,19 +2463,19 @@ define float @v_exp2_f32_nnan_ninf(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_nnan_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_nnan_ninf: ; VI-SDAG: ; %bb.0: @@ -2099,6 +2491,20 @@ define float @v_exp2_f32_nnan_ninf(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_nnan_ninf: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2113,6 +2519,20 @@ define float @v_exp2_f32_nnan_ninf(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2161,19 +2581,19 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic: ; VI-SDAG: ; %bb.0: @@ -2189,6 +2609,20 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2203,6 +2637,20 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan_ninf_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2251,19 +2699,19 @@ define float @v_exp2_f32_dynamic_mode(float %in) #1 { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_dynamic_mode: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_dynamic_mode: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_dynamic_mode: ; VI-SDAG: ; %bb.0: @@ -2279,6 +2727,20 @@ define float @v_exp2_f32_dynamic_mode(float %in) #1 { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_dynamic_mode: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_dynamic_mode: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2293,6 +2755,20 @@ define float @v_exp2_f32_dynamic_mode(float %in) #1 { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_dynamic_mode: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_dynamic_mode: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2313,20 +2789,50 @@ define float @v_exp2_f32_undef() { ; GCN-SDAG-NEXT: v_exp_f32_e32 v0, 0x7fc00000 ; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_undef: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-GISEL-NEXT: v_add_f32_e32 v1, s4, v1 -; GCN-GISEL-NEXT: v_add_f32_e64 v2, s4, 0 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_undef: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000 +; SI-GISEL-NEXT: v_add_f32_e32 v1, s4, v1 +; SI-GISEL-NEXT: v_add_f32_e64 v2, s4, 0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; VI-GISEL-LABEL: v_exp2_f32_undef: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000 +; VI-GISEL-NEXT: v_add_f32_e32 v1, s4, v1 +; VI-GISEL-NEXT: v_add_f32_e64 v2, s4, 0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_exp2_f32_undef: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42800000 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, s4, v1 +; GFX900-GISEL-NEXT: v_add_f32_e64 v2, s4, 0 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp2_f32_undef: ; R600: ; %bb.0: @@ -3359,19 +3865,19 @@ define float @v_exp2_f32_contract(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_contract: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_contract: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_contract: ; VI-SDAG: ; %bb.0: @@ -3387,6 +3893,20 @@ define float @v_exp2_f32_contract(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_contract: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_contract: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3401,6 +3921,20 @@ define float @v_exp2_f32_contract(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_contract: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_contract: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -3449,19 +3983,19 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) { ; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GCN-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf: -; GCN-GISEL: ; %bb.0: -; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 -; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 -; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf: ; VI-SDAG: ; %bb.0: @@ -3477,6 +4011,20 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) { ; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_not_b32_e32 v1, 63 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3491,6 +4039,20 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) { ; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_not_b32_e32 v1, 63 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_contract_nnan_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -3518,3 +4080,5 @@ declare <3 x half> @llvm.exp2.v3f16(<3 x half>) #2 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN-GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index 218e41faa703d..b850428a03c05 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -45,16 +45,17 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: s_load_dword s0, s[4:5], 0xb ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 @@ -64,7 +65,6 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; @@ -104,25 +104,25 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 @@ -162,25 +162,25 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; GFX900-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v4, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x41b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, -v5 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v5, v2 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -218,24 +218,26 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 -; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x41b17218, s2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log_f32: @@ -358,35 +360,36 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 -; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s6, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2 -; SI-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6 -; SI-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7 -; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v1 +; SI-GISEL-NEXT: v_fma_f32 v6, v1, v2, -v5 +; SI-GISEL-NEXT: v_fma_f32 v6, v1, v3, v6 +; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 -; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s7, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v5, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 -; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v5 +; SI-GISEL-NEXT: v_fma_f32 v2, v5, v2, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v5, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 @@ -445,42 +448,43 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 -; VI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_and_b32_e32 v4, 0xfffff000, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s6, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 -; VI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s7, v0 +; VI-GISEL-NEXT: v_log_f32_e32 v3, v0 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 -; VI-GISEL-NEXT: v_sub_f32_e32 v5, v1, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, v3, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v5 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -531,37 +535,38 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s10, v2 -; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v3, -v6 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v4, v7 ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s11, v0 -; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1] -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s11, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v7, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v6 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v3, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v4, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v6|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -608,31 +613,37 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v3, 0x3f317217, v1 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v5, 0x3377d1cf, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5 ; GFX1100-GISEL-NEXT: v_fma_f32 v4, 0x3f317217, v0, -v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3 -; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v4, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_mov_b32 v2, 0 ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm @@ -808,49 +819,51 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7 -; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v2, -v5 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, v6 +; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6 -; SI-GISEL-NEXT: v_log_f32_e32 v6, v6 -; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v5, s9, v5 +; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v6 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 -; SI-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8 -; SI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; SI-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9 -; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1] -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 -; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6 -; SI-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v3, v6, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc -; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_fma_f32 v8, v5, v2, -v7 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_fma_f32 v8, v5, v3, v8 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s10, v1 +; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; SI-GISEL-NEXT: v_log_f32_e32 v8, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v7, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 +; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v8 +; SI-GISEL-NEXT: v_fma_f32 v2, v8, v2, -v5 +; SI-GISEL-NEXT: v_fma_f32 v2, v8, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v5, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v8|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 @@ -927,12 +940,13 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3 @@ -943,45 +957,46 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v3, s9, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; VI-GISEL-NEXT: v_ldexp_f32 v3, s9, v3 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v3, v5 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 -; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7 -; VI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3] -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_log_f32_e32 v6, v1 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, v6, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v5 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v3 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v6|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v3, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 @@ -1046,49 +1061,51 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v2, -v6 ; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7 ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6 +; GFX900-GISEL-NEXT: v_ldexp_f32 v6, s9, v6 ; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v6 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v6 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8 -; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v2, -v8 +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3] ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v6, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v9 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v2, -v6 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v4, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v6, v2 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v9|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v2, s[0:1] +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -1156,49 +1173,55 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s6 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v2 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v8, 0x3377d1cf, v2 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v5, v5, v8 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v6, 0x3f317217, v0, -v3 +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v6, 0x3377d1cf, v0 :: v_dual_lshlrev_b32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1 ; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0 -; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1 -; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_mov_b32 v3, 0 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9 +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v9 :: v_dual_sub_f32 v1, v1, v10 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 @@ -1433,62 +1456,65 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3377d1cf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x3377d1cf -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, -v1 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v4, v6 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s9, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1 -; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 -; SI-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x41b17218 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v1 +; SI-GISEL-NEXT: v_fma_f32 v8, v1, v3, -v7 +; SI-GISEL-NEXT: v_fma_f32 v8, v1, v4, v8 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 -; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9 -; SI-GISEL-NEXT: v_log_f32_e32 v9, v9 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 5, v8 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v8, s10, v8 +; SI-GISEL-NEXT: v_log_f32_e32 v8, v8 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 -; SI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; SI-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10 -; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] -; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3 -; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 -; SI-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4 -; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v8 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_fma_f32 v9, v8, v3, -v7 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-GISEL-NEXT: v_fma_f32 v9, v8, v4, v9 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s11, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v9 +; SI-GISEL-NEXT: v_log_f32_e32 v9, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v8|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v7, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v9 +; SI-GISEL-NEXT: v_fma_f32 v3, v9, v3, -v7 +; SI-GISEL-NEXT: v_fma_f32 v3, v9, v4, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v3, v7, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v9|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; @@ -1581,12 +1607,13 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1 @@ -1597,62 +1624,64 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1 -; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7 -; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v1, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v7, s10, v7 -; VI-GISEL-NEXT: v_log_f32_e32 v7, v7 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3] -; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1] -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6 +; VI-GISEL-NEXT: v_ldexp_f32 v6, s10, v6 +; VI-GISEL-NEXT: v_log_f32_e32 v6, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v6, v5 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v8 -; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x3805fdf4, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9 -; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317000, v8 -; VI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3] -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] ; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317000, v7 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7 +; VI-GISEL-NEXT: v_log_f32_e32 v7, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v5, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v7 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v7, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317000, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v7|, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1730,61 +1759,64 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3f317217 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3377d1cf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3377d1cf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v1 ; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x41b17218 ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v3, -v8 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v9, 5, v9 +; GFX900-GISEL-NEXT: v_ldexp_f32 v9, s10, v9 ; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v9 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 -; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v3, -v8 +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10 +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 +; GFX900-GISEL-NEXT: v_log_f32_e32 v10, v2 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3 -; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 -; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4 -; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] -; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v10 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v3, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v5, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v8, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v10|, v6 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v5 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -1860,60 +1892,67 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0 -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3 -; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(TRANS32_DEP_3) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v3, s3, v3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v7, 0x3f317217, v2 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v7, v7, v12 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v6, 0x3f317217, v1 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_fma_f32 v10, 0x3f317217, v0, -v5 ; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7 -; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3 ; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_sub_f32 v0, v0, v4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| -; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v9 :: v_dual_sub_f32 v2, v2, v14 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1100-GISEL-NEXT: global_store_b128 v5, v[0:3], s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log_v4f32: @@ -2126,10 +2165,10 @@ define float @v_log_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2175,16 +2214,16 @@ define float @v_log_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -2224,10 +2263,10 @@ define float @v_log_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2270,21 +2309,22 @@ define float @v_log_f32(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2329,10 +2369,10 @@ define float @v_log_fabs_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2378,16 +2418,16 @@ define float @v_log_fabs_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -2427,10 +2467,10 @@ define float @v_log_fabs_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2475,20 +2515,22 @@ define float @v_log_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2534,10 +2576,10 @@ define float @v_log_fneg_fabs_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2583,16 +2625,16 @@ define float @v_log_fneg_fabs_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -2632,10 +2674,10 @@ define float @v_log_fneg_fabs_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2680,20 +2722,22 @@ define float @v_log_fneg_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2740,10 +2784,10 @@ define float @v_log_fneg_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2789,16 +2833,16 @@ define float @v_log_fneg_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -2838,10 +2882,10 @@ define float @v_log_fneg_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -2885,20 +2929,22 @@ define float @v_log_fneg_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -3304,10 +3350,10 @@ define float @v_log_f32_ninf(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -3353,16 +3399,16 @@ define float @v_log_f32_ninf(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -3402,10 +3448,10 @@ define float @v_log_f32_ninf(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -3448,21 +3494,22 @@ define float @v_log_f32_ninf(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4038,10 +4085,10 @@ define float @v_log_f32_nnan(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -4087,16 +4134,16 @@ define float @v_log_f32_nnan(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -4136,10 +4183,10 @@ define float @v_log_f32_nnan(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -4182,21 +4229,22 @@ define float @v_log_f32_nnan(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4381,10 +4429,10 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -4430,16 +4478,16 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -4479,10 +4527,10 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -4525,21 +4573,22 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4724,10 +4773,10 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -4773,16 +4822,16 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -4822,10 +4871,10 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -4868,21 +4917,22 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4924,10 +4974,10 @@ define float @v_log_f32_nnan_ninf(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -4967,16 +5017,16 @@ define float @v_log_f32_nnan_ninf(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -5010,10 +5060,10 @@ define float @v_log_f32_nnan_ninf(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -5051,18 +5101,20 @@ define float @v_log_f32_nnan_ninf(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -5207,10 +5259,10 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -5250,16 +5302,16 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3f317000, v0 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -5293,10 +5345,10 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -5334,18 +5386,20 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -5419,10 +5473,10 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -5468,16 +5522,16 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 @@ -5517,10 +5571,10 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3f317217 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3377d1cf @@ -5563,21 +5617,22 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index fd50d1b60fbd1..d09df75837339 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -45,16 +45,17 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: s_load_dword s0, s[4:5], 0xb ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s0, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4 ; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 @@ -64,7 +65,6 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; @@ -104,25 +104,25 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 @@ -162,25 +162,25 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX900-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v4 -; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 -; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v4, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x411a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v2, -v5 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v0, v3, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v5, v2 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -218,24 +218,26 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 -; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x411a209b, s2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log10_f32: @@ -358,35 +360,36 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 -; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s6, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2 -; SI-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6 -; SI-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7 -; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1] +; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v1 +; SI-GISEL-NEXT: v_fma_f32 v6, v1, v2, -v5 +; SI-GISEL-NEXT: v_fma_f32 v6, v1, v3, v6 +; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 -; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s7, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v5, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b ; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 -; SI-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v5 +; SI-GISEL-NEXT: v_fma_f32 v2, v5, v2, -v1 +; SI-GISEL-NEXT: v_fma_f32 v2, v5, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc ; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 @@ -445,42 +448,43 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s6, v2 -; VI-GISEL-NEXT: v_log_f32_e32 v2, v2 -; VI-GISEL-NEXT: v_and_b32_e32 v4, 0xfffff000, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v5, v2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s6, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v4, v1, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v4 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 -; VI-GISEL-NEXT: v_log_f32_e32 v1, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s7, v0 +; VI-GISEL-NEXT: v_log_f32_e32 v3, v0 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b ; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 -; VI-GISEL-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 -; VI-GISEL-NEXT: v_sub_f32_e32 v5, v1, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, v3, v1 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v7, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 ; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v5 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 @@ -531,37 +535,38 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s10, v2 -; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v3, -v6 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v2, v4, v7 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v1 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v3, -v6 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v1, v4, v7 ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[0:1] +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s11, v0 -; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v3, -v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v1, v4, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v6, s[0:1] -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s11, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v7, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v1, v0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v6 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v3, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v6, v4, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v6|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -608,31 +613,37 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, 0x3e9a209a, v0 :: v_dual_mul_f32 v3, 0x3e9a209a, v1 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v3, 0x3e9a209a, v1 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v5, 0x3284fbcf, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5 ; GFX1100-GISEL-NEXT: v_fma_f32 v4, 0x3e9a209a, v0, -v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3 -; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v4, 0x3284fbcf, v0 :: v_dual_fmac_f32 v5, 0x3284fbcf, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v4, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s4 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-GISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, v1, v3 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_mov_b32 v2, 0 ; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 ; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm @@ -808,49 +819,51 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 -; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7 -; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v2, -v5 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, v6 +; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6 -; SI-GISEL-NEXT: v_log_f32_e32 v6, v6 -; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b -; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v5, s9, v5 +; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v6 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 -; SI-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8 -; SI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; SI-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9 -; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1] -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 -; SI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2 -; SI-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6 -; SI-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3 -; SI-GISEL-NEXT: v_add_f32_e32 v3, v6, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc -; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_fma_f32 v8, v5, v2, -v7 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_fma_f32 v8, v5, v3, v8 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s10, v1 +; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; SI-GISEL-NEXT: v_log_f32_e32 v8, v1 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v7, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 +; SI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v8 +; SI-GISEL-NEXT: v_fma_f32 v2, v8, v2, -v5 +; SI-GISEL-NEXT: v_fma_f32 v2, v8, v3, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v2, v5, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v8|, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc ; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 @@ -927,12 +940,13 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v3 @@ -943,45 +957,46 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 ; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v2 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v3, s9, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; VI-GISEL-NEXT: v_ldexp_f32 v3, s9, v3 ; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v3 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v3, v5 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 -; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7 -; VI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v6, s[2:3] -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_log_f32_e32 v6, v1 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v3 -; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v6, v2, v3 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_and_b32_e32 v3, 0xfffff000, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v5, v6, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v5 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v3 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v6|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v3, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 @@ -1046,49 +1061,51 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v6 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v2, -v6 ; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, v7 ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 1.0, v2, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s9, v6 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6 +; GFX900-GISEL-NEXT: v_ldexp_f32 v6, s9, v6 ; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v6 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v6 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s10, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v3, -v8 -; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v2, -v8 +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v6, v4, v9 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 +; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v1 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v5 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v6, v8, s[2:3] ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v7, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v3, -v6 -; GFX900-GISEL-NEXT: v_fma_f32 v3, v2, v4, v3 -; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v6, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v9 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v2, -v6 +; GFX900-GISEL-NEXT: v_fma_f32 v2, v9, v4, v2 +; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v6, v2 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v9|, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v2, s[0:1] +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -1156,49 +1173,55 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s3 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s6 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v2 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v8, 0x3284fbcf, v2 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v5, v5, v8 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v6, 0x3e9a209a, v0, -v3 +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v6, 0x3284fbcf, v0 :: v_dual_lshlrev_b32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v2 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 ; GFX1100-GISEL-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v6, 0x3284fbcf, v0 -; GFX1100-GISEL-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v7, 0x3284fbcf, v1 -; GFX1100-GISEL-NEXT: v_add_f32_e32 v3, v3, v6 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v4, v4, v7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v8, 0x3284fbcf, v2 :: v_dual_mov_b32 v3, 0 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v10 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_sub_f32 v0, v0, v9 +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v9 :: v_dual_sub_f32 v1, v1, v10 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 @@ -1433,62 +1456,65 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x3284fbcf ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x3284fbcf -; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1 -; SI-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7 -; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v3, -v1 +; SI-GISEL-NEXT: v_fma_f32 v6, v0, v4, v6 +; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v5 ; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s9, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b -; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1 -; SI-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 -; SI-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9 +; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x411a209b +; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v1 +; SI-GISEL-NEXT: v_fma_f32 v8, v1, v3, -v7 +; SI-GISEL-NEXT: v_fma_f32 v8, v1, v4, v8 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 -; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9 -; SI-GISEL-NEXT: v_log_f32_e32 v9, v9 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] -; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v8, 5, v8 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v8, s10, v8 +; SI-GISEL-NEXT: v_log_f32_e32 v8, v8 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, v6, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; SI-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 -; SI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; SI-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10 -; SI-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] -; SI-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 -; SI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3 -; SI-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 -; SI-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4 -; SI-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v8 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_fma_f32 v9, v8, v3, -v7 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-GISEL-NEXT: v_fma_f32 v9, v8, v4, v9 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s11, v2 +; SI-GISEL-NEXT: v_add_f32_e32 v7, v7, v9 +; SI-GISEL-NEXT: v_log_f32_e32 v9, v2 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v8|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v8, v7, s[2:3] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v7, 0, v6, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v7 +; SI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v9 +; SI-GISEL-NEXT: v_fma_f32 v3, v9, v3, -v7 +; SI-GISEL-NEXT: v_fma_f32 v3, v9, v4, v3 +; SI-GISEL-NEXT: v_add_f32_e32 v3, v7, v3 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v9|, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v6, s[0:1] ; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm ; @@ -1581,12 +1607,13 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v4, v0, v1 @@ -1597,62 +1624,64 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 ; VI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 ; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v4 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v4 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v3 ; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x411a209b -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v1 -; VI-GISEL-NEXT: v_sub_f32_e32 v7, v1, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 -; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7 -; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x411a209b +; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v1, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v7, v8, v7 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v7, 1.0, v3, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v7, s10, v7 -; VI-GISEL-NEXT: v_log_f32_e32 v7, v7 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3] -; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, v5, s[0:1] -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v7 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v6 +; VI-GISEL-NEXT: v_ldexp_f32 v6, s10, v6 +; VI-GISEL-NEXT: v_log_f32_e32 v6, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v6 +; VI-GISEL-NEXT: v_sub_f32_e32 v7, v6, v5 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v8, v7, v6 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v8 -; VI-GISEL-NEXT: v_mul_f32_e32 v10, 0x369a84fb, v6 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v9, v10, v9 -; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a2000, v8 -; VI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v6, s[2:3] -; VI-GISEL-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 -; VI-GISEL-NEXT: v_and_b32_e32 v6, 0xfffff000, v3 -; VI-GISEL-NEXT: v_sub_f32_e32 v7, v3, v6 ; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v7 -; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] ; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 ; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v7 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v7, v7, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7 +; VI-GISEL-NEXT: v_log_f32_e32 v7, v2 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, v5, s[2:3] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5 +; VI-GISEL-NEXT: v_and_b32_e32 v5, 0xfffff000, v7 +; VI-GISEL-NEXT: v_sub_f32_e32 v6, v7, v5 +; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v6 +; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v8, v9, v8 ; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v6 -; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v7 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 +; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v7|, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 @@ -1730,61 +1759,64 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3e9a209a +; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3284fbcf ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x3284fbcf ; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v4, -v1 +; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v3, -v1 ; GFX900-GISEL-NEXT: v_fma_f32 v7, v0, v5, v7 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x411a209b ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v1 -; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v4, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v3, -v8 ; GFX900-GISEL-NEXT: v_fma_f32 v9, v1, v5, v9 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v9, s10, v9 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v9, 5, v9 +; GFX900-GISEL-NEXT: v_ldexp_f32 v9, s10, v9 ; GFX900-GISEL-NEXT: v_log_f32_e32 v9, v9 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[2:3] ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, v7, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v9 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v4, -v8 -; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v3, -v8 +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v10, v9, v5, v10 +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 ; GFX900-GISEL-NEXT: v_add_f32_e32 v8, v8, v10 +; GFX900-GISEL-NEXT: v_log_f32_e32 v10, v2 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], |v9|, v6 ; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v8, s[2:3] ; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v7, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3 -; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v4, -v8 -; GFX900-GISEL-NEXT: v_fma_f32 v4, v3, v5, v4 -; GFX900-GISEL-NEXT: v_add_f32_e32 v4, v8, v4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v6 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v7, s[0:1] -; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v10 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v3, -v8 +; GFX900-GISEL-NEXT: v_fma_f32 v3, v10, v5, v3 +; GFX900-GISEL-NEXT: v_add_f32_e32 v3, v8, v3 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v10|, v6 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v7, s[0:1] +; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v5 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -1860,60 +1892,67 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0 -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3 -; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(TRANS32_DEP_3) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 0x411a209b, s8 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 0x411a209b, s9 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_mul_f32 v6, 0x3e9a209a, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v3, s3, v3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v2 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3 +; GFX1100-GISEL-NEXT: v_add_f32_e32 v7, v7, v12 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v7, 0x3e9a209a, v2 :: v_dual_mul_f32 v8, 0x3e9a209a, v3 +; GFX1100-GISEL-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-GISEL-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v1 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_fma_f32 v10, 0x3e9a209a, v0, -v5 ; GFX1100-GISEL-NEXT: v_fma_f32 v11, 0x3e9a209a, v1, -v6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7 -; GFX1100-GISEL-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3 ; GFX1100-GISEL-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| -; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v9 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_sub_f32 v0, v0, v4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc_lo ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| -; GFX1100-GISEL-NEXT: v_dual_cndmask_b32 v3, v3, v8 :: v_dual_sub_f32 v2, v2, v14 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc_lo +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v9 :: v_dual_sub_f32 v2, v2, v14 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v3, v3, v15 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1100-GISEL-NEXT: global_store_b128 v5, v[0:3], s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log10_v4f32: @@ -2126,10 +2165,10 @@ define float @v_log10_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2175,16 +2214,16 @@ define float @v_log10_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -2224,10 +2263,10 @@ define float @v_log10_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2270,21 +2309,22 @@ define float @v_log10_f32(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2329,10 +2369,10 @@ define float @v_log10_fabs_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2378,16 +2418,16 @@ define float @v_log10_fabs_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -2427,10 +2467,10 @@ define float @v_log10_fabs_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2475,20 +2515,22 @@ define float @v_log10_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2534,10 +2576,10 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2583,16 +2625,16 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -2632,10 +2674,10 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2680,20 +2722,22 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -2740,10 +2784,10 @@ define float @v_log10_fneg_f32(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2789,16 +2833,16 @@ define float @v_log10_fneg_f32(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -2838,10 +2882,10 @@ define float @v_log10_fneg_f32(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -2885,20 +2929,22 @@ define float @v_log10_fneg_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -3304,10 +3350,10 @@ define float @v_log10_f32_ninf(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -3353,16 +3399,16 @@ define float @v_log10_f32_ninf(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -3402,10 +3448,10 @@ define float @v_log10_f32_ninf(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -3448,21 +3494,22 @@ define float @v_log10_f32_ninf(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4038,10 +4085,10 @@ define float @v_log10_f32_nnan(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -4087,16 +4134,16 @@ define float @v_log10_f32_nnan(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -4136,10 +4183,10 @@ define float @v_log10_f32_nnan(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -4182,21 +4229,22 @@ define float @v_log10_f32_nnan(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4381,10 +4429,10 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -4430,16 +4478,16 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -4479,10 +4527,10 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -4525,21 +4573,22 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4724,10 +4773,10 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -4773,16 +4822,16 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -4822,10 +4871,10 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -4868,21 +4917,22 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -4924,10 +4974,10 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -4967,16 +5017,16 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -5010,10 +5060,10 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -5051,18 +5101,20 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -5207,10 +5259,10 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -5250,16 +5302,16 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 +; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v0 +; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3e9a2000, v0 ; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -5293,10 +5345,10 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -5334,18 +5386,20 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_add_f32_e32 v0, v1, v2 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; @@ -5419,10 +5473,10 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -5468,16 +5522,16 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 ; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 -; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; VI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v1 +; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 ; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 @@ -5517,10 +5571,10 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3e9a209a ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x3284fbcf @@ -5563,21 +5617,22 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 ; GFX1100-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll index 2c5a9f58a199e..8b3b79b0b1bdd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -36,14 +36,14 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: s_load_dword s2, s[4:5], 0xb ; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s2, v0 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -74,13 +74,13 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s2, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 @@ -108,20 +108,19 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; ; GFX900-GISEL-LABEL: s_log2_f32: ; GFX900-GISEL: ; %bb.0: -; GFX900-GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c -; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 -; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -147,20 +146,22 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; ; GFX1100-GISEL-LABEL: s_log2_f32: ; GFX1100-GISEL: ; %bb.0: -; GFX1100-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c +; GFX1100-GISEL-NEXT: s_clause 0x1 +; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c +; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 -; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v1 :: v_dual_mov_b32 v1, 0 -; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_f32: @@ -242,21 +243,22 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-GISEL: ; %bb.0: ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v3, s6, v3 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 -; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 -; SI-GISEL-NEXT: v_log_f32_e32 v1, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s6, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s7, v0 +; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 +; SI-GISEL-NEXT: v_log_f32_e32 v3, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -291,21 +293,22 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-GISEL: ; %bb.0: ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v3, s6, v3 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s7, v0 -; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 -; VI-GISEL-NEXT: v_log_f32_e32 v1, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v2, s6, v2 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s7, v0 +; VI-GISEL-NEXT: v_log_f32_e32 v2, v2 +; VI-GISEL-NEXT: v_log_f32_e32 v3, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -339,22 +342,23 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-GISEL: ; %bb.0: ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, s10, v3 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s11, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v3, s10, v3 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s11, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v3 -; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] +; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -387,23 +391,28 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-GISEL-LABEL: s_log2_v2f32: ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s3, v1 -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3 -; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v3 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX1100-GISEL-NEXT: global_store_b64 v4, v[0:1], s[0:1] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_v2f32: @@ -506,32 +515,34 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; ; SI-GISEL-LABEL: s_log2_v3f32: ; SI-GISEL: ; %bb.0: -; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd -; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1 -; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, s9, v3 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s10, v1 ; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; SI-GISEL-NEXT: v_mul_f32_e32 v4, s1, v4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] -; SI-GISEL-NEXT: v_log_f32_e32 v4, v4 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1 -; SI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] +; SI-GISEL-NEXT: v_log_f32_e32 v4, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 -; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm @@ -571,32 +582,34 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; ; VI-GISEL-LABEL: s_log2_v3f32: ; VI-GISEL: ; %bb.0: -; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 +; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 -; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v3, s9, v3 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 ; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; VI-GISEL-NEXT: v_mul_f32_e32 v4, s1, v4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1 -; VI-GISEL-NEXT: v_log_f32_e32 v4, v4 -; VI-GISEL-NEXT: v_log_f32_e32 v2, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 +; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 +; VI-GISEL-NEXT: v_log_f32_e32 v4, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 +; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-GISEL-NEXT: s_endpgm ; @@ -637,28 +650,30 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc -; GFX900-GISEL-NEXT: v_mul_f32_e32 v4, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4 +; GFX900-GISEL-NEXT: v_ldexp_f32 v4, s1, v4 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v2, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s2, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s2, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4 -; GFX900-GISEL-NEXT: v_log_f32_e32 v2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] +; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -702,33 +717,40 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; ; GFX1100-GISEL-LABEL: s_log2_v3f32: ; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v6, 0 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s6 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s3 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v2, s2, v2 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_mov_b32 v3, 0 ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5 -; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: global_store_b96 v3, v[0:2], s[0:1] +; GFX1100-GISEL-NEXT: global_store_b96 v6, v[0:2], s[4:5] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_v3f32: @@ -865,34 +887,37 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; SI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; SI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s9, v1 ; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] +; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v3, s[0:1] ; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; SI-GISEL-NEXT: v_mul_f32_e32 v5, s10, v5 -; SI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; SI-GISEL-NEXT: v_log_f32_e32 v5, v5 -; SI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] -; SI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 -; SI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; SI-GISEL-NEXT: s_mov_b32 s6, -1 +; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4 +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v4, s10, v4 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s11, v2 +; SI-GISEL-NEXT: v_log_f32_e32 v4, v4 +; SI-GISEL-NEXT: v_log_f32_e32 v5, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] +; SI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 +; SI-GISEL-NEXT: v_sub_f32_e32 v3, v5, v3 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm @@ -942,33 +967,36 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; VI-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 -; VI-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 ; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] +; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v3, s[0:1] ; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; VI-GISEL-NEXT: v_mul_f32_e32 v5, s10, v5 -; VI-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 -; VI-GISEL-NEXT: v_log_f32_e32 v5, v5 -; VI-GISEL-NEXT: v_log_f32_e32 v3, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] -; VI-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 -; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 +; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4 +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-GISEL-NEXT: v_ldexp_f32 v4, s10, v4 +; VI-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 +; VI-GISEL-NEXT: v_log_f32_e32 v4, v4 +; VI-GISEL-NEXT: v_log_f32_e32 v5, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] +; VI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 +; VI-GISEL-NEXT: v_sub_f32_e32 v3, v5, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] @@ -1018,34 +1046,37 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s8, v0 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s9, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 ; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc ; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 ; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 ; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[0:1] -; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s10, v5 -; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s11, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-GISEL-NEXT: v_ldexp_f32 v5, s10, v5 +; GFX900-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 ; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5 -; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] +; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] ; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 -; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 -; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v6, v3 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX900-GISEL-NEXT: s_endpgm ; @@ -1095,39 +1126,46 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; ; GFX1100-GISEL-LABEL: s_log2_v4f32: ; GFX1100-GISEL: ; %bb.0: +; GFX1100-GISEL-NEXT: s_clause 0x1 ; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX1100-GISEL-NEXT: v_mov_b32_e32 v8, 0 ; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1 -; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0 ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s3 +; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s6 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-GISEL-NEXT: v_dual_mul_f32 v2, s2, v2 :: v_dual_mul_f32 v3, s3, v3 -; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v5 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_lshlrev_b32 v3, 5, v3 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v3, s3, v3 ; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_3) | instid1(VALU_DEP_3) -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_sub_f32 v1, v1, v5 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-GISEL-NEXT: v_dual_sub_f32 v2, v2, v6 :: v_dual_sub_f32 v3, v3, v7 -; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1100-GISEL-NEXT: v_dual_sub_f32 v3, v3, v7 :: v_dual_lshlrev_b32 v2, 5, v2 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 +; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 +; GFX1100-GISEL-NEXT: global_store_b128 v8, v[0:3], s[4:5] ; GFX1100-GISEL-NEXT: s_endpgm ; ; R600-LABEL: s_log2_v4f32: @@ -1243,19 +1281,19 @@ define float @v_log2_f32(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32: ; VI-SDAG: ; %bb.0: @@ -1271,6 +1309,20 @@ define float @v_log2_f32(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1285,6 +1337,20 @@ define float @v_log2_f32(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1304,10 +1370,12 @@ define float @v_log2_f32(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1341,19 +1409,19 @@ define float @v_log2_fabs_f32(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_fabs_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_fabs_f32: ; VI-SDAG: ; %bb.0: @@ -1369,6 +1437,20 @@ define float @v_log2_fabs_f32(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_fabs_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1383,6 +1465,20 @@ define float @v_log2_fabs_f32(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1403,10 +1499,11 @@ define float @v_log2_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1441,19 +1538,19 @@ define float @v_log2_fneg_fabs_f32(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_fneg_fabs_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_fneg_fabs_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_fneg_fabs_f32: ; VI-SDAG: ; %bb.0: @@ -1469,6 +1566,20 @@ define float @v_log2_fneg_fabs_f32(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_fneg_fabs_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_fneg_fabs_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1483,6 +1594,20 @@ define float @v_log2_fneg_fabs_f32(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_fneg_fabs_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1503,10 +1628,11 @@ define float @v_log2_fneg_fabs_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1542,19 +1668,19 @@ define float @v_log2_fneg_f32(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_fneg_f32: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_fneg_f32: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_fneg_f32: ; VI-SDAG: ; %bb.0: @@ -1570,6 +1696,20 @@ define float @v_log2_fneg_f32(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_fneg_f32: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_fneg_f32: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1584,6 +1724,20 @@ define float @v_log2_fneg_f32(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_fneg_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_fneg_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1604,10 +1758,11 @@ define float @v_log2_fneg_f32(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0 ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1642,19 +1797,19 @@ define float @v_log2_f32_fast(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_fast: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_fast: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_fast: ; VI-SDAG: ; %bb.0: @@ -1670,6 +1825,20 @@ define float @v_log2_f32_fast(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_fast: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_fast: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1684,6 +1853,20 @@ define float @v_log2_f32_fast(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_fast: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_fast: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1703,10 +1886,12 @@ define float @v_log2_f32_fast(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1740,19 +1925,19 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_unsafe_math_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_unsafe_math_attr: ; VI-SDAG: ; %bb.0: @@ -1768,6 +1953,20 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_unsafe_math_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_unsafe_math_attr: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1782,6 +1981,20 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_unsafe_math_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1801,10 +2014,12 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1838,19 +2053,19 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_approx_fn_attr: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_approx_fn_attr: ; VI-SDAG: ; %bb.0: @@ -1866,6 +2081,20 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_approx_fn_attr: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_approx_fn_attr: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1880,6 +2109,20 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_approx_fn_attr: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1899,10 +2142,12 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1936,19 +2181,19 @@ define float @v_log2_f32_ninf(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_ninf: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_ninf: ; VI-SDAG: ; %bb.0: @@ -1964,6 +2209,20 @@ define float @v_log2_f32_ninf(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_ninf: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1978,6 +2237,20 @@ define float @v_log2_f32_ninf(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1997,10 +2270,12 @@ define float @v_log2_f32_ninf(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2034,19 +2309,19 @@ define float @v_log2_f32_afn(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_afn: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_afn: ; VI-SDAG: ; %bb.0: @@ -2062,6 +2337,20 @@ define float @v_log2_f32_afn(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_afn: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2076,6 +2365,20 @@ define float @v_log2_f32_afn(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_afn: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2095,10 +2398,12 @@ define float @v_log2_f32_afn(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2158,19 +2463,19 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_afn_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_afn_dynamic: ; VI-SDAG: ; %bb.0: @@ -2186,6 +2491,20 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_afn_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_afn_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2200,6 +2519,20 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_afn_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2219,10 +2552,12 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2256,19 +2591,19 @@ define float @v_fabs_log2_f32_afn(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_fabs_log2_f32_afn: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_fabs_log2_f32_afn: ; VI-SDAG: ; %bb.0: @@ -2284,6 +2619,20 @@ define float @v_fabs_log2_f32_afn(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_fabs_log2_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_fabs_log2_f32_afn: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2298,6 +2647,20 @@ define float @v_fabs_log2_f32_afn(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_fabs_log2_f32_afn: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2318,10 +2681,11 @@ define float @v_fabs_log2_f32_afn(float %in) { ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2382,19 +2746,19 @@ define float @v_log2_f32_nnan(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_nnan: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_nnan: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_nnan: ; VI-SDAG: ; %bb.0: @@ -2410,6 +2774,20 @@ define float @v_log2_f32_nnan(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_nnan: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_nnan: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2424,6 +2802,20 @@ define float @v_log2_f32_nnan(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2443,10 +2835,12 @@ define float @v_log2_f32_nnan(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2506,19 +2900,19 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_nnan_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_nnan_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_nnan_dynamic: ; VI-SDAG: ; %bb.0: @@ -2534,6 +2928,20 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_nnan_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_nnan_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2548,6 +2956,20 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_nnan_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2567,10 +2989,12 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2630,19 +3054,19 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_ninf_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_ninf_dynamic: ; VI-SDAG: ; %bb.0: @@ -2658,6 +3082,20 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_ninf_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2672,6 +3110,20 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2691,10 +3143,12 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2728,19 +3182,19 @@ define float @v_log2_f32_nnan_ninf(float %in) { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_nnan_ninf: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_nnan_ninf: ; VI-SDAG: ; %bb.0: @@ -2756,6 +3210,20 @@ define float @v_log2_f32_nnan_ninf(float %in) { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_nnan_ninf: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2770,6 +3238,20 @@ define float @v_log2_f32_nnan_ninf(float %in) { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_nnan_ninf: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2789,10 +3271,12 @@ define float @v_log2_f32_nnan_ninf(float %in) { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2852,19 +3336,19 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: ; VI-SDAG: ; %bb.0: @@ -2880,6 +3364,20 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2894,6 +3392,20 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2913,10 +3425,12 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2976,19 +3490,19 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 { ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX689-GISEL-LABEL: v_log2_f32_dynamic_mode: -; GFX689-GISEL: ; %bb.0: -; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX689-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 -; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] +; SI-GISEL-LABEL: v_log2_f32_dynamic_mode: +; SI-GISEL: ; %bb.0: +; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; VI-SDAG-LABEL: v_log2_f32_dynamic_mode: ; VI-SDAG: ; %bb.0: @@ -3004,6 +3518,20 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 { ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; VI-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; VI-GISEL-LABEL: v_log2_f32_dynamic_mode: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 +; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX900-SDAG-LABEL: v_log2_f32_dynamic_mode: ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3018,6 +3546,20 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 { ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; +; GFX900-GISEL-LABEL: v_log2_f32_dynamic_mode: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 +; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 +; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_dynamic_mode: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3037,10 +3579,12 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 { ; GFX1100-GISEL: ; %bb.0: ; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll index ba428df273db5..a439f8df10a26 100644 --- a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll +++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll @@ -3,32 +3,17 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_cs float @v_s_exp_f32(float inreg %src) { -; GFX12-SDAG-LABEL: v_s_exp_f32: -; GFX12-SDAG: ; %bb.0: -; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 -; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42800000, 0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-SDAG-NEXT: s_add_f32 s0, s0, s1 -; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0xffffffc0, 0 -; GFX12-SDAG-NEXT: v_s_exp_f32 s0, s0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) -; GFX12-SDAG-NEXT: v_ldexp_f32 v0, s0, s1 -; GFX12-SDAG-NEXT: ; return to shader part epilog -; -; GFX12-GISEL-LABEL: v_s_exp_f32: -; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42800000, 0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: s_add_f32 s0, s0, s1 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0 -; GFX12-GISEL-NEXT: v_s_exp_f32 s0, s0 -; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 -; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-GISEL-NEXT: ; return to shader part epilog +; GFX12-LABEL: v_s_exp_f32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 +; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-NEXT: s_add_f32 s0, s0, s1 +; GFX12-NEXT: s_cselect_b32 s1, 0xffffffc0, 0 +; GFX12-NEXT: v_s_exp_f32 s0, s0 +; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) +; GFX12-NEXT: v_ldexp_f32 v0, s0, s1 +; GFX12-NEXT: ; return to shader part epilog %result = call float @llvm.exp2.f32(float %src) ret float %result } @@ -88,16 +73,16 @@ define amdgpu_cs float @v_s_log_f32(float inreg %src) { ; GFX12-GISEL-LABEL: v_s_log_f32: ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0 -; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0 -; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-GISEL-NEXT: s_lshl_b32 s2, s1, 5 +; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0 +; GFX12-GISEL-NEXT: v_ldexp_f32 v0, s0, s2 +; GFX12-GISEL-NEXT: s_cselect_b32 s0, 0x42000000, 0 +; GFX12-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) +; GFX12-GISEL-NEXT: v_subrev_f32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %result = call float @llvm.log2.f32(float %src) ret float %result @@ -322,19 +307,18 @@ define amdgpu_cs float @srcmods_abs_f32(float inreg %src) { ; ; GFX12-GISEL-LABEL: srcmods_abs_f32: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: s_bitset0_b32 s0, 31 +; GFX12-GISEL-NEXT: s_and_b32 s1, s0, 0x7fffffff ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 -; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) -; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0 -; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_cmp_lt_f32 s1, 0x800000 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0 +; GFX12-GISEL-NEXT: s_lshl_b32 s2, s1, 5 +; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0 +; GFX12-GISEL-NEXT: v_ldexp_f32 v0, |s0|, s2 +; GFX12-GISEL-NEXT: s_cselect_b32 s0, 0x42000000, 0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) +; GFX12-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: v_subrev_f32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %abs = call float @llvm.fabs.f32(float %src) %result = call float @llvm.log2.f32(float %abs) @@ -362,19 +346,18 @@ define amdgpu_cs float @srcmods_neg_f32(float inreg %src) { ; ; GFX12-GISEL-LABEL: srcmods_neg_f32: ; GFX12-GISEL: ; %bb.0: -; GFX12-GISEL-NEXT: s_xor_b32 s0, s0, 0x80000000 +; GFX12-GISEL-NEXT: s_xor_b32 s1, s0, 0x80000000 ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 -; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 -; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) -; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0 -; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_cmp_lt_f32 s1, 0x800000 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 1, 0 +; GFX12-GISEL-NEXT: s_lshl_b32 s2, s1, 5 +; GFX12-GISEL-NEXT: s_cmp_lg_u32 s1, 0 +; GFX12-GISEL-NEXT: v_ldexp_f32 v0, -s0, s2 +; GFX12-GISEL-NEXT: s_cselect_b32 s0, 0x42000000, 0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) +; GFX12-GISEL-NEXT: v_log_f32_e32 v0, v0 ; GFX12-GISEL-NEXT: s_wait_alu 0xfffe -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) -; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: v_subrev_f32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: ; return to shader part epilog %neg = fneg float %src %result = call float @llvm.log2.f32(float %neg) From 966500ebbace897b35be4466517dffd319396b58 Mon Sep 17 00:00:00 2001 From: vikashgu Date: Tue, 24 Dec 2024 06:56:44 +0000 Subject: [PATCH 4/7] Addressed suggested reviewed changes. --- llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index 06da76d5049e5..e3a5e20f4e537 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -451,6 +451,7 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp( std::function &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_FMUL); assert(Sel.getOpcode() == TargetOpcode::G_SELECT); + assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg()); Register Dst = MI.getOperand(0).getReg(); LLT DestTy = MRI.getType(Dst); @@ -501,14 +502,13 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp( Builder.buildConstant(IntDestTy, SelectTrueVal), Builder.buildConstant(IntDestTy, SelectFalseVal)); + Register XReg = MI.getOperand(1).getReg(); if (SelectTrueCst->Value.isNegative()) { - auto NegX = Builder.buildFNeg( - DestTy, MI.getOperand(1).getReg(), - MRI.getVRegDef(MI.getOperand(1).getReg())->getFlags()); + auto NegX = + Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags()); Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags()); } else { - Builder.buildFLdexp(Dst, MI.getOperand(1).getReg(), NewSel, - MI.getFlags()); + Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags()); } }; From a23895556c27119f42a26cc3ae1102b1cbfa9058 Mon Sep 17 00:00:00 2001 From: vikashgu Date: Tue, 24 Dec 2024 08:10:02 +0000 Subject: [PATCH 5/7] Added constructor with TII in AMDGPUCombinerHelper to propagate it from Pre/Post legalizeCombiner. --- llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 13 +++++++++---- llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h | 9 +++++++++ .../Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 2 +- .../Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 2 +- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index e3a5e20f4e537..3be39e72ccffb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -17,6 +17,13 @@ using namespace llvm; using namespace MIPatternMatch; +AMDGPUCombinerHelper::AMDGPUCombinerHelper( + GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, + GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI, + const GCNSubtarget &STI) + : CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI), + TII(*STI.getInstrInfo()) {} + LLVM_READNONE static bool fnegFoldsIntoMI(const MachineInstr &MI) { switch (MI.getOpcode()) { @@ -481,11 +488,9 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp( return false; // For f32, only non-inline constants should be transformed. - const SIInstrInfo *TII = - (MI.getMF()->getSubtarget()).getInstrInfo(); if (ScalarDestTy == LLT::float32() && - TII->isInlineConstant(SelectTrueCst->Value) && - TII->isInlineConstant(SelectFalseCst->Value)) + TII.isInlineConstant(SelectTrueCst->Value) && + TII.isInlineConstant(SelectFalseCst->Value)) return false; int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h index 9a0a4205ed54b..893b3f5415f8c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h @@ -15,13 +15,22 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H +#include "GCNSubtarget.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" namespace llvm { class AMDGPUCombinerHelper : public CombinerHelper { +protected: + const GCNSubtarget &STI; + const SIInstrInfo &TII; + public: using CombinerHelper::CombinerHelper; + AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, + bool IsPreLegalize, GISelKnownBits *KB, + MachineDominatorTree *MDT, const LegalizerInfo *LI, + const GCNSubtarget &STI); bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo); void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 54d927c33fc55..888817e52e35d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -134,7 +134,7 @@ AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl( const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), TII(*STI.getInstrInfo()), - Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI), + Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI, STI), #define GET_GICOMBINER_CONSTRUCTOR_INITS #include "AMDGPUGenPostLegalizeGICombiner.inc" #undef GET_GICOMBINER_CONSTRUCTOR_INITS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index ff8189ce31f7f..e1564d5de415d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -94,7 +94,7 @@ AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl( const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), - Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), + Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI), #define GET_GICOMBINER_CONSTRUCTOR_INITS #include "AMDGPUGenPreLegalizeGICombiner.inc" #undef GET_GICOMBINER_CONSTRUCTOR_INITS From 398dcf35a1c3ead8152c5532731364852f678f6a Mon Sep 17 00:00:00 2001 From: vikashgu Date: Fri, 27 Dec 2024 08:36:49 +0000 Subject: [PATCH 6/7] Utilized isConstantOrConstantSplatFP API in matchCombine function. --- .../Target/AMDGPU/AMDGPUCombinerHelper.cpp | 48 +++++++++---------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index 3be39e72ccffb..8ca3c1747d20a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -467,48 +467,44 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp( if ((ScalarDestTy == LLT::float64() || ScalarDestTy == LLT::float32() || ScalarDestTy == LLT::float16()) && (MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))) { - Register SelectCond = Sel.getOperand(1).getReg(); - Register SelectTrue = Sel.getOperand(2).getReg(); - Register SelectFalse = Sel.getOperand(3).getReg(); - - const auto SelectTrueCst = - DestTy.isVector() - ? getFConstantSplat(SelectTrue, MRI, /*allowUndef=*/false) - : getFConstantVRegValWithLookThrough(SelectTrue, MRI); - if (!SelectTrueCst) + Register SelectCondReg = Sel.getOperand(1).getReg(); + MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg()); + MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg()); + + const auto SelectTrueVal = + isConstantOrConstantSplatVectorFP(*SelectTrue, MRI); + if (!SelectTrueVal) return false; - const auto SelectFalseCst = - DestTy.isVector() - ? getFConstantSplat(SelectFalse, MRI, /*allowUndef=*/false) - : getFConstantVRegValWithLookThrough(SelectFalse, MRI); - if (!SelectFalseCst) + const auto SelectFalseVal = + isConstantOrConstantSplatVectorFP(*SelectFalse, MRI); + if (!SelectFalseVal) return false; - if (SelectTrueCst->Value.isNegative() != SelectFalseCst->Value.isNegative()) + if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative()) return false; // For f32, only non-inline constants should be transformed. if (ScalarDestTy == LLT::float32() && - TII.isInlineConstant(SelectTrueCst->Value) && - TII.isInlineConstant(SelectFalseCst->Value)) + TII.isInlineConstant(*SelectTrueVal) && + TII.isInlineConstant(*SelectFalseVal)) return false; - int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs(); - if (SelectTrueVal == INT_MIN) + int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs(); + if (SelectTrueLog2Val == INT_MIN) return false; - int SelectFalseVal = SelectFalseCst->Value.getExactLog2Abs(); - if (SelectFalseVal == INT_MIN) + int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs(); + if (SelectFalseLog2Val == INT_MIN) return false; MatchInfo = [=, &MI](MachineIRBuilder &Builder) { LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32)); - auto NewSel = - Builder.buildSelect(IntDestTy, SelectCond, - Builder.buildConstant(IntDestTy, SelectTrueVal), - Builder.buildConstant(IntDestTy, SelectFalseVal)); + auto NewSel = Builder.buildSelect( + IntDestTy, SelectCondReg, + Builder.buildConstant(IntDestTy, SelectTrueLog2Val), + Builder.buildConstant(IntDestTy, SelectFalseLog2Val)); Register XReg = MI.getOperand(1).getReg(); - if (SelectTrueCst->Value.isNegative()) { + if (SelectTrueVal->isNegative()) { auto NegX = Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags()); Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags()); From d024dca4d7db95e6a1a44c8a0ae604bdfe25f13b Mon Sep 17 00:00:00 2001 From: vikashgu Date: Fri, 3 Jan 2025 07:29:03 +0000 Subject: [PATCH 7/7] Addressed a suggested change regarding early false return. --- .../Target/AMDGPU/AMDGPUCombinerHelper.cpp | 92 +++++++++---------- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index 8ca3c1747d20a..f6f9f4bc0fb1b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -464,57 +464,55 @@ bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp( LLT DestTy = MRI.getType(Dst); LLT ScalarDestTy = DestTy.getScalarType(); - if ((ScalarDestTy == LLT::float64() || ScalarDestTy == LLT::float32() || - ScalarDestTy == LLT::float16()) && - (MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))) { - Register SelectCondReg = Sel.getOperand(1).getReg(); - MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg()); - MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg()); - - const auto SelectTrueVal = - isConstantOrConstantSplatVectorFP(*SelectTrue, MRI); - if (!SelectTrueVal) - return false; - const auto SelectFalseVal = - isConstantOrConstantSplatVectorFP(*SelectFalse, MRI); - if (!SelectFalseVal) - return false; + if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() && + ScalarDestTy != LLT::float16()) || + !MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg())) + return false; - if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative()) - return false; + Register SelectCondReg = Sel.getOperand(1).getReg(); + MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg()); + MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg()); - // For f32, only non-inline constants should be transformed. - if (ScalarDestTy == LLT::float32() && - TII.isInlineConstant(*SelectTrueVal) && - TII.isInlineConstant(*SelectFalseVal)) - return false; + const auto SelectTrueVal = + isConstantOrConstantSplatVectorFP(*SelectTrue, MRI); + if (!SelectTrueVal) + return false; + const auto SelectFalseVal = + isConstantOrConstantSplatVectorFP(*SelectFalse, MRI); + if (!SelectFalseVal) + return false; - int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs(); - if (SelectTrueLog2Val == INT_MIN) - return false; - int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs(); - if (SelectFalseLog2Val == INT_MIN) - return false; + if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative()) + return false; - MatchInfo = [=, &MI](MachineIRBuilder &Builder) { - LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32)); - auto NewSel = Builder.buildSelect( - IntDestTy, SelectCondReg, - Builder.buildConstant(IntDestTy, SelectTrueLog2Val), - Builder.buildConstant(IntDestTy, SelectFalseLog2Val)); - - Register XReg = MI.getOperand(1).getReg(); - if (SelectTrueVal->isNegative()) { - auto NegX = - Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags()); - Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags()); - } else { - Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags()); - } - }; + // For f32, only non-inline constants should be transformed. + if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) && + TII.isInlineConstant(*SelectFalseVal)) + return false; - return true; - } + int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs(); + if (SelectTrueLog2Val == INT_MIN) + return false; + int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs(); + if (SelectFalseLog2Val == INT_MIN) + return false; - return false; + MatchInfo = [=, &MI](MachineIRBuilder &Builder) { + LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32)); + auto NewSel = Builder.buildSelect( + IntDestTy, SelectCondReg, + Builder.buildConstant(IntDestTy, SelectTrueLog2Val), + Builder.buildConstant(IntDestTy, SelectFalseLog2Val)); + + Register XReg = MI.getOperand(1).getReg(); + if (SelectTrueVal->isNegative()) { + auto NegX = + Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags()); + Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags()); + } else { + Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags()); + } + }; + + return true; }