From 51b12c3be2bad4925742b0bd9833b5a77c335a62 Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Wed, 9 Apr 2025 15:24:55 +0200 Subject: [PATCH 1/4] precommit --- .../CodeGen/AMDGPU/short-select-cndmask.ll | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll diff --git a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll new file mode 100644 index 0000000000000..2d6810a34afb2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN + +define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) { +; GCN-LABEL: test: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo +; GCN-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc_lo +; GCN-NEXT: global_store_b128 v[7:8], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp eq i32 %a, -1 + %val1 = select i1 %vcc, i32 %x, i32 %y + %val2 = select i1 %vcc, i32 0, i32 %p + %val3 = select i1 %vcc, i32 0, i32 %q + %val4 = select i1 %vcc, i32 %r, i32 %s + %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1 + %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2 + %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3 + store <4 x i32> %ret3, ptr addrspace(1) %out + ret void +} + +define amdgpu_cs void @test_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) { +; GCN-LABEL: test_negative_case: +; GCN: ; %bb.0: ; %.entry +; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo +; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo +; GCN-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc_lo +; GCN-NEXT: global_store_b128 v[7:8], v[0:3], off +; GCN-NEXT: s_endpgm +.entry: + %vcc = icmp eq i32 %a, -1 + %val1 = select i1 %vcc, i32 %x, i32 %y + %val2 = select i1 %vcc, i32 0, i32 %p + %val3 = select i1 %vcc, i32 0, i32 %q + %val4 = select i1 %vcc, i32 %r, i32 %s + %ret0 = insertelement <4 x i32> poison, i32 %val1, i32 0 + %ret1 = insertelement <4 x i32> %ret0, i32 %val2, i32 1 + %ret2 = insertelement <4 x i32> %ret1, i32 %val3, i32 2 + %ret3 = insertelement <4 x i32> %ret2, i32 %val4, i32 3 + store <4 x i32> %ret3, ptr addrspace(1) %out + ret void +} From b2f1080ae2dbbd94fda0902feba9bac89a5a6385 Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Wed, 9 Apr 2025 15:37:22 +0200 Subject: [PATCH 2/4] [AMDGPU] Merge V_CNDMASKS into V_DUAL_CNDMASK --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 91 ++++++++++++++++++- .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 18 ++-- .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 18 ++-- llvm/test/CodeGen/AMDGPU/div_i128.ll | 20 ++-- llvm/test/CodeGen/AMDGPU/div_v2i128.ll | 80 ++++++++-------- .../CodeGen/AMDGPU/extract_vector_dynelt.ll | 6 +- llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll | 69 +++++++------- llvm/test/CodeGen/AMDGPU/fptoi.i128.ll | 36 ++++---- .../CodeGen/AMDGPU/insert_vector_dynelt.ll | 47 +++++----- llvm/test/CodeGen/AMDGPU/itofp.i128.ll | 36 ++++---- .../CodeGen/AMDGPU/short-select-cndmask.ll | 16 ++-- 11 files changed, 260 insertions(+), 177 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index d6acf9e081b9f..4ad538e0b1e5f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -105,6 +105,25 @@ class SIFoldOperandsImpl { } } + unsigned getInverseCompareOpcode(MachineInstr &MI) const { + switch (MI.getOpcode()) { + case AMDGPU::V_CMP_EQ_U32_e64: + return AMDGPU::V_CMP_NE_U32_e64; + case AMDGPU::V_CMP_NE_U32_e64: + return AMDGPU::V_CMP_EQ_U32_e64; + case AMDGPU::V_CMP_GE_U32_e64: + return AMDGPU::V_CMP_LT_U32_e64; + case AMDGPU::V_CMP_LE_U32_e64: + return AMDGPU::V_CMP_GT_U32_e64; + case AMDGPU::V_CMP_GT_U32_e64: + return AMDGPU::V_CMP_LE_U32_e64; + case AMDGPU::V_CMP_LT_U32_e64: + return AMDGPU::V_CMP_GE_U32_e64; + default: + return 0; + } + } + bool foldCopyToVGPROfScalarAddOfFrameIndex(Register DstReg, Register SrcReg, MachineInstr &MI) const; @@ -133,7 +152,8 @@ class SIFoldOperandsImpl { std::optional getImmOrMaterializedImm(MachineOperand &Op) const; bool tryConstantFoldOp(MachineInstr *MI) const; - bool tryFoldCndMask(MachineInstr &MI) const; + bool tryFoldCndMask(MachineInstr &MI, Register *RegVCC, + Register *newVCC) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; @@ -152,6 +172,9 @@ class SIFoldOperandsImpl { bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB); + bool shouldSwitchOperands(MachineRegisterInfo &MRI, MachineInstr &MI, + const SIInstrInfo &TII) const; + public: SIFoldOperandsImpl() = default; @@ -1459,13 +1482,73 @@ bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *MI) const { return false; } +bool SIFoldOperandsImpl::shouldSwitchOperands(MachineRegisterInfo &MRI, + MachineInstr &MI, + const SIInstrInfo &TII) const { + auto allUses = MRI.use_nodbg_operands(MI.getOperand(5).getReg()); + unsigned count = 0; + + for (auto &Use : allUses) { + if (Use.getParent()->getOpcode() != AMDGPU::V_CNDMASK_B32_e64) + return false; + MachineOperand *Src0 = + TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src0); + MachineOperand *Src1 = + TII.getNamedOperand(*Use.getParent(), AMDGPU::OpName::src1); + + auto src0Imm = getImmOrMaterializedImm(*Src0); + auto src1Imm = getImmOrMaterializedImm(*Src1); + + if (!src1Imm && src0Imm) + return false; + if (src1Imm && !src0Imm) + count++; + } + return (count >= 2); +} + // Try to fold an instruction into a simpler one -bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const { +bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI, Register *RegVCC, + Register *NewVCC) const { unsigned Opc = MI.getOpcode(); if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 && Opc != AMDGPU::V_CNDMASK_B64_PSEUDO) return false; + if (Opc == AMDGPU::V_CNDMASK_B32_e64) { + const DebugLoc &DL = MI.getDebugLoc(); + auto Reg = MI.getOperand(5).getReg(); + + if (*RegVCC != Reg) { + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI) { + unsigned Opcode = getInverseCompareOpcode(*DefMI); + if (Opcode && + SIFoldOperandsImpl::shouldSwitchOperands(*MRI, MI, *TII)) { + auto cmpDL = DefMI->getDebugLoc(); + *NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg)); + *RegVCC = Reg; + MachineInstrBuilder inverseCompare = BuildMI( + *DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC); + + inverseCompare.add(DefMI->getOperand(1)); + inverseCompare.add(DefMI->getOperand(2)); + } + } + } + if (*RegVCC == Reg) { + BuildMI(*MI.getParent(), MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), + MI.getOperand(0).getReg()) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .addReg(*NewVCC); + MI.eraseFromParent(); + return true; + } + } + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src1->isIdenticalTo(*Src0)) { @@ -2533,10 +2616,12 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) { bool HasNSZ = MFI->hasNoSignedZerosFPMath(); bool Changed = false; + Register Reg = 0; + Register newVCC = 0; for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineOperand *CurrentKnownM0Val = nullptr; for (auto &MI : make_early_inc_range(*MBB)) { - Changed |= tryFoldCndMask(MI); + Changed |= tryFoldCndMask(MI, &Reg, &newVCC); if (tryFoldZeroHighBits(MI)) { Changed = true; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index d9158e3558395..536504747c971 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -2835,9 +2835,9 @@ define i48 @v_uaddsat_i48(i48 %lhs, i48 %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_i48: @@ -2944,10 +2944,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_sv(i48 inreg %lhs, i48 %rhs) { ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: ; return to shader part epilog ; @@ -3003,10 +3003,10 @@ define amdgpu_ps <2 x float> @uaddsat_i48_vs(i48 %lhs, i48 inreg %rhs) { ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: ; return to shader part epilog ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index 1fd139b06417f..1944d1577ae29 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -2705,9 +2705,9 @@ define i48 @v_usubsat_i48(i48 %lhs, i48 %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_i48: @@ -2815,9 +2815,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_sv(i48 inreg %lhs, i48 %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_i48_sv: @@ -2873,9 +2873,9 @@ define amdgpu_ps <2 x float> @usubsat_i48_vs(i48 %lhs, i48 inreg %rhs) { ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, v1, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: usubsat_i48_vs: diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index 06c0417211809..efd633d21dba1 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -1287,11 +1287,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-NEXT: v_xor_b32_e32 v6, 0x7f, v0 ; GFX9-G-NEXT: v_or_b32_e32 v14, v6, v2 ; GFX9-G-NEXT: v_and_b32_e32 v6, 1, v20 -; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-G-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v7, v9, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v12, v10, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v13, v11, 0, vcc +; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX9-G-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v12, 0, v10, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v13, 0, v11, vcc ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15] ; GFX9-G-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc ; GFX9-G-NEXT: v_or_b32_e32 v14, v20, v14 @@ -3414,11 +3414,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-NEXT: v_xor_b32_e32 v8, 0x7f, v12 ; GFX9-G-NEXT: v_or_b32_e32 v16, v8, v14 ; GFX9-G-NEXT: v_and_b32_e32 v8, 1, v18 -; GFX9-G-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; GFX9-G-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v11, v1, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v8, v2, 0, vcc -; GFX9-G-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc +; GFX9-G-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; GFX9-G-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v11, 0, v1, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v8, 0, v2, vcc +; GFX9-G-NEXT: v_cndmask_b32_e32 v9, 0, v3, vcc ; GFX9-G-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17] ; GFX9-G-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; GFX9-G-NEXT: v_or_b32_e32 v16, v18, v16 diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll index 77b78f1f8a333..07d7276e3b944 100644 --- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll @@ -495,13 +495,13 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 ; GISEL-NEXT: v_and_b32_e32 v9, 1, v9 ; GISEL-NEXT: v_and_b32_e32 v8, 1, v8 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v22, v18, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v22, 0, v18, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v8, v20, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, v21, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v20, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v21, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v23, v19, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v23, 0, v19, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB0_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -685,12 +685,12 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v11, v14, v15 ; GISEL-NEXT: v_and_b32_e32 v14, 1, v11 ; GISEL-NEXT: v_or_b32_e32 v10, v11, v10 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14 -; GISEL-NEXT: v_cndmask_b32_e64 v14, v6, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14 +; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v6, vcc ; GISEL-NEXT: v_and_b32_e32 v16, 1, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v15, v7, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v11, v13, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] @@ -1251,13 +1251,13 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v2, v3, v2 ; GISEL-NEXT: v_and_b32_e32 v3, 1, v3 ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 -; GISEL-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v18, 0, v0, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v2, v16, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v17, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v19, v1, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v19, 0, v1, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB1_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -1423,12 +1423,12 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v9, v20, v10 ; GISEL-NEXT: v_and_b32_e32 v10, 1, v9 ; GISEL-NEXT: v_or_b32_e32 v8, v9, v8 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v4, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc ; GISEL-NEXT: v_and_b32_e32 v20, 1, v8 -; GISEL-NEXT: v_cndmask_b32_e64 v11, v5, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v6, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, v7, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] @@ -2093,13 +2093,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v18, v19, v18 ; GISEL-NEXT: v_and_b32_e32 v19, 1, v19 ; GISEL-NEXT: v_and_b32_e32 v18, 1, v18 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19 -; GISEL-NEXT: v_cndmask_b32_e64 v31, v16, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19 +; GISEL-NEXT: v_cndmask_b32_e32 v31, 0, v16, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v18, v8, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, v9, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v18, 0, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v19, 0, v9, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v32, v17, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v32, 0, v17, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB2_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -2283,12 +2283,12 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v3, v20, v21 ; GISEL-NEXT: v_and_b32_e32 v20, 1, v3 ; GISEL-NEXT: v_or_b32_e32 v2, v3, v2 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, v12, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v20 +; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v12, vcc ; GISEL-NEXT: v_and_b32_e32 v22, 1, v2 -; GISEL-NEXT: v_cndmask_b32_e64 v21, v13, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v13, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] @@ -2920,13 +2920,13 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v20, v21, v20 ; GISEL-NEXT: v_and_b32_e32 v21, 1, v21 ; GISEL-NEXT: v_and_b32_e32 v20, 1, v20 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21 -; GISEL-NEXT: v_cndmask_b32_e64 v32, v0, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v21 +; GISEL-NEXT: v_cndmask_b32_e32 v32, 0, v0, vcc ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v20 -; GISEL-NEXT: v_cndmask_b32_e64 v20, v2, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v21, v3, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v3, vcc ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GISEL-NEXT: v_cndmask_b32_e64 v33, v1, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v33, 0, v1, vcc ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] ; GISEL-NEXT: s_cbranch_execz .LBB3_6 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15 @@ -3092,12 +3092,12 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) { ; GISEL-NEXT: v_or_b32_e32 v19, v26, v24 ; GISEL-NEXT: v_and_b32_e32 v24, 1, v19 ; GISEL-NEXT: v_or_b32_e32 v18, v19, v18 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v24 -; GISEL-NEXT: v_cndmask_b32_e64 v24, v4, 0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 +; GISEL-NEXT: v_cndmask_b32_e32 v24, 0, v4, vcc ; GISEL-NEXT: v_and_b32_e32 v26, 1, v18 -; GISEL-NEXT: v_cndmask_b32_e64 v25, v5, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v18, v6, 0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v19, v7, 0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v25, 0, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v18, 0, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v19, 0, v7, vcc ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll index 10de973dac0c5..cd1426f868bce 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -1282,10 +1282,10 @@ define double @double16_extelt_vec(i32 %sel) { ; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc ; GCN-NEXT: s_or_b64 vcc, vcc, s[4:5] ; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 15, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc ; GCN-NEXT: v_mov_b32_e32 v1, 0x40301999 -; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <16 x double> , i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll index 14f7cbcd0f438..1b471166b5d29 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll @@ -2836,9 +2836,9 @@ define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 0.0, float %arg %i2 = fneg float %i @@ -2897,9 +2897,9 @@ define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %ar ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 2.0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 2.0, -v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 2.0, float %arg %i2 = fneg float %i @@ -2958,9 +2958,9 @@ define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1 ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, -2.0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, -2.0, -v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float -2.0, float %arg %i2 = fneg float %i @@ -3068,8 +3068,9 @@ define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) { ; GCN-NEXT: v_bfrev_b32_e32 v3, 1 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, double 0.0, double %arg %i2 = fneg double %i @@ -3122,20 +3123,20 @@ define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) { ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: v_and_b32_e32 v1, 1, v1 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc +; SI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-NEXT: v_cndmask_b32_e64 v0, 0, -v0, vcc ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_fneg_select_infloop_regression_f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_and_b32_e32 v1, 1, v1 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, half 0.0, half %arg %i2 = fneg half %i @@ -3189,10 +3190,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 ; SI-NEXT: v_and_b32_e32 v1, 1, v2 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; SI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, vcc +; SI-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc ; SI-NEXT: v_cvt_f32_f16_e32 v0, v1 ; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 @@ -3202,10 +3203,10 @@ define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %a ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_and_b32_e32 v1, 1, v1 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 -; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; VI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; VI-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg %i2 = fneg <2 x half> %i @@ -3264,11 +3265,11 @@ define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 ; GCN-NEXT: v_bfrev_b32_e32 v3, 1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, v3, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 +; GCN-NEXT: v_cndmask_b32_e64 v1, v3, -v1, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v3, -v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg %i2 = fneg <2 x float> %i @@ -3316,9 +3317,9 @@ define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, |v0|, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, |v0|, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 0.0, float %arg %i2 = call float @llvm.fabs.f32(float %i) @@ -3367,9 +3368,9 @@ define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, -|v0|, 0, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -|v0|, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %i = select i1 %arg1, float 0.0, float %arg %i2 = call float @llvm.fabs.f32(float %i) diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll index 3465c782bd700..0ff60af86135b 100644 --- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll @@ -241,10 +241,10 @@ define i128 @fptosi_f64_to_i128(double %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -256,7 +256,7 @@ define i128 @fptosi_f64_to_i128(double %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr9 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -605,10 +605,10 @@ define i128 @fptoui_f64_to_i128(double %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -620,7 +620,7 @@ define i128 @fptoui_f64_to_i128(double %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr9 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -962,10 +962,10 @@ define i128 @fptosi_f32_to_i128(float %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -977,7 +977,7 @@ define i128 @fptosi_f32_to_i128(float %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr8 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -1313,10 +1313,10 @@ define i128 @fptoui_f32_to_i128(float %x) { ; GISEL-NEXT: v_sub_u32_e32 v2, 64, v7 ; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[4:5] ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v6, v[4:5] -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v7 ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v11, v8, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v12, v2, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v9, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -1328,7 +1328,7 @@ define i128 @fptoui_f32_to_i128(float %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v8, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr8 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6 @@ -1692,8 +1692,8 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v4, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v2, 0, s[6:7] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -1705,7 +1705,7 @@ define i128 @fptosi_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7 @@ -2039,8 +2039,8 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_lshlrev_b64 v[4:5], v4, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v9, v[0:1] ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v10 -; GISEL-NEXT: v_cndmask_b32_e64 v10, v2, 0, s[6:7] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, v2, s[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v8, v[6:7] ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v11, v8, 0 ; GISEL-NEXT: v_mov_b32_e32 v2, v6 @@ -2052,7 +2052,7 @@ define i128 @fptoui_bf16_to_i128(bfloat %x) { ; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] ; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v10, v9, v[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr5 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v8, v[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index 4b9da7b49e997..e649c3034f35b 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -1921,30 +1921,31 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 -; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 ; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v16 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 -; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 -; GCN-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 -; GCN-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v9, v9, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 -; GCN-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v11, v11, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v16 -; GCN-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v16 -; GCN-NEXT: v_cndmask_b32_e64 v14, v14, 0, vcc -; GCN-NEXT: v_cndmask_b32_e32 v15, v15, v17, vcc +; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 +; GCN-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v16 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: v_cndmask_b32_e32 v3, v17, v3, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v16 +; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; GCN-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v16 +; GCN-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc +; GCN-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 4, v16 +; GCN-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc +; GCN-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 5, v16 +; GCN-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc +; GCN-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 6, v16 +; GCN-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc +; GCN-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 7, v16 +; GCN-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc +; GCN-NEXT: v_cndmask_b32_e32 v15, v17, v15, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %v = insertelement <8 x double> %vec, double 1.000000e+00, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll index c316ec71863d0..6bfeda6a1a9e5 100644 --- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll @@ -214,11 +214,11 @@ define float @sitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v11, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -459,11 +459,11 @@ define float @uitofp_i128_to_f32(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -746,11 +746,11 @@ define double @sitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v16, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v9, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v12, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v0, v0, v4 ; GISEL-NEXT: v_and_b32_e32 v1, v1, v5 ; GISEL-NEXT: v_and_or_b32 v0, v9, v2, v0 @@ -1023,11 +1023,11 @@ define double @uitofp_i128_to_f64(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v8, v12, v16, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v12, v13, v17, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v15 ; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v8, -1, v8, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v12, -1, v12, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v4, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v5, v3 ; GISEL-NEXT: v_and_or_b32 v0, v8, v0, v2 @@ -1305,11 +1305,11 @@ define half @sitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v11, v15, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v11, v12, v16, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v14 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v11, -1, v11, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v9, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v10, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 @@ -1552,11 +1552,11 @@ define half @uitofp_i128_to_f16(i128 %x) { ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v10, v14, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v15, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 ; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v8, vcc ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, -1, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v10, v10, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, v8, v2 ; GISEL-NEXT: v_and_b32_e32 v3, v9, v3 ; GISEL-NEXT: v_and_or_b32 v0, v5, v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll index 2d6810a34afb2..8f9b56c42de64 100644 --- a/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll +++ b/llvm/test/CodeGen/AMDGPU/short-select-cndmask.ll @@ -5,11 +5,9 @@ define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) { ; GCN-LABEL: test: ; GCN: ; %bb.0: ; %.entry -; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo -; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo -; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo -; GCN-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc_lo +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3 +; GCN-NEXT: v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6 ; GCN-NEXT: global_store_b128 v[7:8], v[0:3], off ; GCN-NEXT: s_endpgm .entry: @@ -29,11 +27,9 @@ define amdgpu_cs void @test(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 define amdgpu_cs void @test_negative_case(i32 %a, i32 %x, i32 %y, i32 %p, i32 %q, i32 %r, i32 %s, ptr addrspace(1) %out) { ; GCN-LABEL: test_negative_case: ; GCN: ; %bb.0: ; %.entry -; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, -1, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo -; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo -; GCN-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc_lo -; GCN-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc_lo +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v0 +; GCN-NEXT: v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, 0, v3 +; GCN-NEXT: v_dual_cndmask_b32 v2, 0, v4 :: v_dual_cndmask_b32 v3, v5, v6 ; GCN-NEXT: global_store_b128 v[7:8], v[0:3], off ; GCN-NEXT: s_endpgm .entry: From e7e34626dc8355b30a07ea2fc3c84d02d8f56bfb Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Wed, 9 Apr 2025 17:08:40 +0200 Subject: [PATCH 3/4] cover case for single v_cndmask to reduce code size --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 15 +- .../AMDGPU/GlobalISel/extractelement.ll | 204 +++++++++--------- .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 137 ++++++------ ...amdgpu-codegenprepare-fold-binop-select.ll | 8 +- llvm/test/CodeGen/AMDGPU/ctlz.ll | 20 +- llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll | 22 +- llvm/test/CodeGen/AMDGPU/cttz.ll | 20 +- llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll | 12 +- llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll | 3 +- .../CodeGen/AMDGPU/insert_vector_dynelt.ll | 5 +- .../CodeGen/AMDGPU/private-memory-atomics.ll | 6 +- 11 files changed, 231 insertions(+), 221 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4ad538e0b1e5f..1ec0ab9b5bd9c 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -119,6 +119,19 @@ class SIFoldOperandsImpl { return AMDGPU::V_CMP_LE_U32_e64; case AMDGPU::V_CMP_LT_U32_e64: return AMDGPU::V_CMP_GE_U32_e64; + + // case AMDGPU::V_CMP_EQ_U32_e64: + // return AMDGPU::V_CMP_NE_U32_e64; + // case AMDGPU::V_CMP_NE_U32_e64: + // return AMDGPU::V_CMP_EQ_U32_e64; + // case AMDGPU::V_CMP_GE_U32_e64: + // return AMDGPU::V_CMP_LT_U32_e64; + // case AMDGPU::V_CMP_LE_U32_e64: + // return AMDGPU::V_CMP_GT_U32_e64; + // case AMDGPU::V_CMP_GT_U32_e64: + // return AMDGPU::V_CMP_LE_U32_e64; + // case AMDGPU::V_CMP_LT_U32_e64: + // return AMDGPU::V_CMP_GE_U32_e64; default: return 0; } @@ -1504,7 +1517,7 @@ bool SIFoldOperandsImpl::shouldSwitchOperands(MachineRegisterInfo &MRI, if (src1Imm && !src0Imm) count++; } - return (count >= 2); + return (count >= 1); } // Try to fold an instruction into a simpler one diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 31a229a908142..772b72ac5efa2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -11,22 +11,22 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 ; GCN-NEXT: v_cndmask_b32_e64 v6, 1.0, 2.0, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 -; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 7, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10PLUS-LABEL: dyn_extract_v8f32_const_s_v: @@ -34,18 +34,18 @@ define float @dyn_extract_v8f32_const_s_v(i32 %sel) { ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo -; GFX10PLUS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v1, 0x41000000, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo +; GFX10PLUS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10PLUS-NEXT: v_cndmask_b32_e32 v0, 0x41000000, v1, vcc_lo ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] entry: %ext = extractelement <8 x float> , i32 %sel @@ -3383,43 +3383,43 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 ; GCN-NEXT: v_cndmask_b32_e64 v13, 1.0, 2.0, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v13, v1, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0 ; GCN-NEXT: v_mov_b32_e32 v2, 0x40a00000 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 4, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 4, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x40c00000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 5, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 5, v0 ; GCN-NEXT: v_mov_b32_e32 v4, 0x40e00000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 6, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 6, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0x41000000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 7, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 7, v0 ; GCN-NEXT: v_mov_b32_e32 v6, 0x41100000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 8, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 8, v0 ; GCN-NEXT: v_mov_b32_e32 v7, 0x41200000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 9, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0 ; GCN-NEXT: v_mov_b32_e32 v8, 0x41300000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 10, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 10, v0 ; GCN-NEXT: v_mov_b32_e32 v9, 0x41400000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 11, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 11, v0 ; GCN-NEXT: v_mov_b32_e32 v10, 0x41500000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 12, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 12, v0 ; GCN-NEXT: v_mov_b32_e32 v11, 0x41600000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 13, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 13, v0 ; GCN-NEXT: v_mov_b32_e32 v12, 0x41700000 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 14, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 14, v0 +; GCN-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 15, v0 ; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -3429,32 +3429,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 8, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 9, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 10, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 11, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 12, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 13, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo +; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 14, v0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v1, s4, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -3464,32 +3464,32 @@ define float @dyn_extract_v15f32_const_s_v(i32 %sel) { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40400000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 3, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 4, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40a00000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 5, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40c00000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 6, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x40e00000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 7, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41000000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 8, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41100000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 9, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41200000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 10, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41300000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 11, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41400000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 12, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41500000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 13, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41600000, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 14, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x41700000, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 2, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40400000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 3, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 4.0, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 4, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40a00000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 5, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40c00000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 6, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x40e00000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 7, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41000000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 8, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41100000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 9, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41200000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 10, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41300000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 11, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41400000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 12, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41500000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 13, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41600000, v1, vcc_lo +; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 14, v0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x41700000, v1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 15, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index 723ad5646c0a3..03b713f6866a0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -5155,8 +5155,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX8-NEXT: s_and_b32 s0, 1, s2 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, 0, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_ashr_i32 s0, s9, 31 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 @@ -5202,8 +5202,8 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX9-NEXT: s_and_b32 s0, 1, s2 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, 0, s0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX9-NEXT: s_ashr_i32 s0, s9, 31 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 @@ -5241,16 +5241,16 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) { ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX10-NEXT: s_and_b32 s0, 1, s10 ; GFX10-NEXT: s_cmp_eq_u64 s[6:7], 0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 -; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX10-NEXT: s_cselect_b32 s1, 1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 ; GFX10-NEXT: s_and_b32 s1, 1, s1 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 -; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: s_ashr_i32 s0, s9, 31 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s1 ; GFX10-NEXT: s_add_i32 s1, s0, 0x80000000 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, s5 ; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 @@ -5282,16 +5282,15 @@ define amdgpu_ps i128 @s_saddsat_i128(i128 inreg %lhs, i128 inreg %rhs) { ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX11-NEXT: s_and_b32 s0, 1, s10 ; GFX11-NEXT: s_cmp_eq_u64 s[6:7], 0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 -; GFX11-NEXT: s_cselect_b32 s1, 1, 0 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; GFX11-NEXT: s_cselect_b32 s1, 1, 0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 ; GFX11-NEXT: s_and_b32 s1, 1, s1 -; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 -; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 -; GFX11-NEXT: v_mov_b32_e32 v2, s5 ; GFX11-NEXT: s_ashr_i32 s0, s9, 31 +; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s1 ; GFX11-NEXT: s_add_i32 s1, s0, 0x80000000 +; GFX11-NEXT: v_dual_cndmask_b32 v1, 0, v2 :: v_dual_mov_b32 v2, s5 ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX11-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 1, v0 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 @@ -5511,8 +5510,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX8-NEXT: s_and_b32 s0, 1, s4 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, 0, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; GFX8-NEXT: v_bfrev_b32_e32 v1, 1 @@ -5545,8 +5544,8 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX9-NEXT: s_and_b32 s0, 1, s4 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, 0, s0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 @@ -5572,13 +5571,13 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3] ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s1 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3] ; GFX10-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; GFX10-NEXT: v_add_nc_u32_e32 v3, 0x80000000, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v8, 0, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s0 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v8, vcc_lo ; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 @@ -5602,18 +5601,18 @@ define amdgpu_ps <4 x float> @saddsat_i128_vs(i128 %lhs, i128 inreg %rhs) { ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo ; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[6:7], v[2:3] ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, s1 -; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[6:7], v[2:3] ; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v7 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v0 :: v_dual_add_nc_u32 v3, 0x80000000, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v1, v8, 0, s0 +; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s0 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v8, vcc_lo ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc_lo -; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v2 :: v_dual_cndmask_b32 v3, v7, v3 -; GFX11-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc_lo +; GFX11-NEXT: v_dual_cndmask_b32 v2, v6, v2 :: v_dual_cndmask_b32 v3, v7, v3 ; GFX11-NEXT: ; return to shader part epilog %result = call i128 @llvm.sadd.sat.i128(i128 %lhs, i128 %rhs) %cast = bitcast i128 %result to <4 x float> @@ -5982,8 +5981,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX8-NEXT: s_and_b32 s0, 1, s2 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, 0, s0 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_ashr_i32 s0, s17, 31 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 @@ -6021,8 +6020,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] ; GFX8-NEXT: s_and_b32 s4, 1, s6 -; GFX8-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, s4 -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, 0, s4 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX8-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_ashr_i32 s4, s3, 31 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 @@ -6072,8 +6071,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; GFX9-NEXT: s_and_b32 s0, 1, s2 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[0:1] +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, 0, s0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX9-NEXT: s_ashr_i32 s0, s17, 31 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 @@ -6111,8 +6110,8 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] ; GFX9-NEXT: s_and_b32 s4, 1, s6 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, s4 -; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, 0, s4 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX9-NEXT: s_ashr_i32 s4, s3, 31 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 @@ -6154,53 +6153,53 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX10-NEXT: s_and_b32 s0, 1, s18 ; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 -; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX10-NEXT: s_and_b32 s1, 1, s1 +; GFX10-NEXT: s_cselect_b32 s1, 1, 0 ; GFX10-NEXT: s_ashr_i32 s10, s17, 31 -; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 -; GFX10-NEXT: s_add_i32 s11, s10, 0x80000000 +; GFX10-NEXT: s_and_b32 s1, 1, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s1 +; GFX10-NEXT: s_add_i32 s11, s10, 0x80000000 ; GFX10-NEXT: s_add_u32 s0, s4, s12 ; GFX10-NEXT: s_addc_u32 s1, s5, s13 ; GFX10-NEXT: s_addc_u32 s2, s6, s14 ; GFX10-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5] +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc_lo ; GFX10-NEXT: s_addc_u32 s3, s7, s15 -; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 -; GFX10-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] ; GFX10-NEXT: v_mov_b32_e32 v5, s0 -; GFX10-NEXT: s_cselect_b32 s12, 1, 0 +; GFX10-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] +; GFX10-NEXT: v_mov_b32_e32 v6, s1 +; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 ; GFX10-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] +; GFX10-NEXT: s_cselect_b32 s12, 1, 0 ; GFX10-NEXT: v_cmp_lt_i64_e64 s6, s[14:15], 0 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX10-NEXT: v_mov_b32_e32 v6, s1 ; GFX10-NEXT: v_mov_b32_e32 v7, s3 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 ; GFX10-NEXT: s_and_b32 s4, 1, s12 ; GFX10-NEXT: s_cmp_eq_u64 s[14:15], 0 -; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 -; GFX10-NEXT: s_cselect_b32 s5, 1, 0 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 +; GFX10-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 ; GFX10-NEXT: s_and_b32 s5, 1, s5 -; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s5 +; GFX10-NEXT: s_ashr_i32 s4, s3, 31 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s5 +; GFX10-NEXT: s_add_i32 s0, s4, 0x80000000 +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v3, s8 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s16 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4 -; GFX10-NEXT: v_mov_b32_e32 v3, s8 -; GFX10-NEXT: s_ashr_i32 s4, s3, 31 -; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s10, vcc_lo ; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v2, s17 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, s10, vcc_lo -; GFX10-NEXT: s_add_i32 s0, s4, 0x80000000 -; GFX10-NEXT: v_readfirstlane_b32 s1, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s10, vcc_lo ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo +; GFX10-NEXT: v_readfirstlane_b32 s1, v4 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 @@ -6231,42 +6230,43 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX11-NEXT: s_and_b32 s0, 1, s18 ; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 -; GFX11-NEXT: s_cselect_b32 s1, 1, 0 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 -; GFX11-NEXT: s_and_b32 s1, 1, s1 +; GFX11-NEXT: s_cselect_b32 s1, 1, 0 ; GFX11-NEXT: s_ashr_i32 s10, s17, 31 -; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 -; GFX11-NEXT: s_add_i32 s11, s10, 0x80000000 +; GFX11-NEXT: s_and_b32 s1, 1, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s2 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v2, 0, s0 +; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s1 +; GFX11-NEXT: s_add_i32 s11, s10, 0x80000000 ; GFX11-NEXT: s_add_u32 s0, s4, s12 ; GFX11-NEXT: s_addc_u32 s1, s5, s13 ; GFX11-NEXT: s_addc_u32 s2, s6, s14 +; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc_lo ; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[0:1], s[4:5] ; GFX11-NEXT: s_addc_u32 s3, s7, s15 +; GFX11-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3 ; GFX11-NEXT: v_xor_b32_e32 v0, v1, v0 ; GFX11-NEXT: s_cmp_eq_u64 s[2:3], s[6:7] -; GFX11-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v7, s3 ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 ; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[2:3], s[6:7] ; GFX11-NEXT: s_cselect_b32 s12, 1, 0 ; GFX11-NEXT: v_cmp_lt_i64_e64 s6, s[14:15], 0 -; GFX11-NEXT: v_dual_mov_b32 v5, s0 :: v_dual_and_b32 v0, 1, v0 +; GFX11-NEXT: v_mov_b32_e32 v5, s0 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 ; GFX11-NEXT: s_and_b32 s4, 1, s12 ; GFX11-NEXT: s_cmp_eq_u64 s[14:15], 0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 -; GFX11-NEXT: s_cselect_b32 s5, 1, 0 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 +; GFX11-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX11-NEXT: s_and_b32 s5, 1, s5 -; GFX11-NEXT: v_cmp_ne_u32_e64 s4, 0, s5 +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v2, v3, 0, s4 -; GFX11-NEXT: v_mov_b32_e32 v3, s8 +; GFX11-NEXT: v_cmp_eq_u32_e64 vcc_lo, 0, s5 +; GFX11-NEXT: s_ashr_i32 s4, s3, 31 +; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000 +; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v3 :: v_dual_mov_b32 v3, s8 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_mov_b32_e32 v0, s16 -; GFX11-NEXT: s_ashr_i32 s4, s3, 31 ; GFX11-NEXT: v_xor_b32_e32 v1, v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v4, s9 ; GFX11-NEXT: v_mov_b32_e32 v2, s17 @@ -6275,7 +6275,6 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128> ; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v4, s10, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s11, vcc_lo -; GFX11-NEXT: s_add_i32 s0, s4, 0x80000000 ; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, s2 ; GFX11-NEXT: v_readfirstlane_b32 s1, v4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll index 7fdc012d4f1b5..4d62e9bcc3bc3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -213,10 +213,10 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) { ; GCN-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s4 -; GCN-NEXT: v_cndmask_b32_e64 v0, v1, 5, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 5, v1, vcc ; GCN-NEXT: v_sub_u32_e32 v1, vcc, 0, v0 ; GCN-NEXT: v_max_i32_e32 v1, v0, v1 ; GCN-NEXT: v_cvt_f32_u32_e32 v2, v1 @@ -293,10 +293,10 @@ define i32 @select_sdiv_rhs_opaque_const1_i32(i1 %cond) { ; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0 ; GCN-NEXT: v_mov_b32_e32 v1, 0xa410 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc ; GCN-NEXT: s_mov_b32 s4, 0x30c30c31 ; GCN-NEXT: v_mul_hi_i32 v0, v0, s4 ; GCN-NEXT: v_lshrrev_b32_e32 v1, 31, v0 diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll index b4d450a90d595..179c0bd1fdfc8 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -1099,9 +1099,9 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm @@ -1327,8 +1327,8 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; @@ -1564,10 +1564,10 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, 0xffe8, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0xffff, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm @@ -1684,11 +1684,11 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v2, v1 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX10-GISEL-NEXT: v_min_u32_e32 v2, 32, v2 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, -16, v2 ; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo ; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm ; @@ -1805,10 +1805,10 @@ define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, 0xffe7, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index 65ee228b64c6a..f4bbba6246b65 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -1542,8 +1542,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(ptr addrspace(1) no ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm @@ -1712,8 +1712,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v0 ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1809,9 +1809,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(ptr addrspa ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: global_store_byte v[0:1], v2, off @@ -1897,8 +1897,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(ptr addrspace(1) noali ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm @@ -2066,8 +2066,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(ptr addrspace(1 ; GFX9-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll index f0c278a67c8bc..1bd640135a642 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz.ll @@ -951,9 +951,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm @@ -1153,8 +1153,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v0, v0 ; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 32, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 32, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo ; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1357,9 +1357,9 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v0 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_sdwa s2, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD +; GFX10-GISEL-NEXT: v_cmp_ne_u32_sdwa vcc_lo, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, s2 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0xffff, v1, vcc_lo ; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1463,10 +1463,10 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX10-GISEL-NEXT: v_or_b32_e32 v2, 0x10000, v1 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v2, v2 ; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, 0xffff, vcc_lo +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo ; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-GISEL-NEXT: s_endpgm %val = load i16, ptr addrspace(1) %valptr @@ -1567,8 +1567,8 @@ define amdgpu_kernel void @v_cttz_i7_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x80, v0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1 -; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0x7f, vcc_lo +; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0x7f, v1, vcc_lo ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 ; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll index 777f363fedf9a..06c61d599dea9 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -1170,8 +1170,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out, ; GFX9-GISEL-NEXT: v_or3_b32 v1, v2, v3, v1 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v1 ; GFX9-GISEL-NEXT: v_min_u32_e32 v2, 32, v2 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v2, -1, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc ; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm %val = load i32, ptr addrspace(1) %arrayidx, align 1 @@ -1510,8 +1510,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX9-GISEL-NEXT: v_or_b32_e32 v3, 0x100, v1 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v3, v3 ; GFX9-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc ; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm %val = load i8, ptr addrspace(1) %arrayidx, align 1 @@ -1612,8 +1612,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX9-GISEL-NEXT: v_or_b32_e32 v2, 0x10000, v1 ; GFX9-GISEL-NEXT: v_ffbl_b32_e32 v2, v2 ; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc ; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm %val = load i16, ptr addrspace(1) %arrayidx, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll index 1b471166b5d29..7f51cbec5dc4e 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll @@ -3066,9 +3066,8 @@ define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 ; GCN-NEXT: v_bfrev_b32_e32 v3, 1 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2 +; GCN-NEXT: v_cndmask_b32_e64 v1, v3, -v1, vcc ; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index e649c3034f35b..46c868b4559b4 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -1920,10 +1920,9 @@ define <8 x double> @double8_inselt_vec(<8 x double> %vec, i32 %sel) { ; GCN-LABEL: double8_inselt_vec: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 +; GCN-NEXT: v_mov_b32_e32 v17, 0x3ff00000 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 1, v16 ; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll index 24a4d8fbde200..b0b875c2b11a1 100644 --- a/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll @@ -123,10 +123,10 @@ define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) { ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 1, vcc +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, 1, v1, vcc ; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen -; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, 1, 0, vcc ; GCN-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, v1 From 3d1ae88e0555d0c1544f43904b85ca3aa4da4e89 Mon Sep 17 00:00:00 2001 From: Ana Mihajlovic Date: Wed, 9 Apr 2025 17:48:46 +0200 Subject: [PATCH 4/4] added float instructions --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 42 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll | 270 +- .../CodeGen/AMDGPU/GlobalISel/llvm.powi.ll | 18 +- .../AMDGPU/GlobalISel/select-to-fmin-fmax.ll | 28 +- .../amdgpu-simplify-libcall-pow-codegen.ll | 3456 +++++++++++++---- .../AMDGPU/copysign-simplify-demanded-bits.ll | 6 +- llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll | 6 +- llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll | 44 +- llvm/test/CodeGen/AMDGPU/llvm.exp.ll | 684 ++-- llvm/test/CodeGen/AMDGPU/llvm.exp10.ll | 684 ++-- 10 files changed, 3595 insertions(+), 1643 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 1ec0ab9b5bd9c..02f5b88e2d7a0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -107,6 +107,7 @@ class SIFoldOperandsImpl { unsigned getInverseCompareOpcode(MachineInstr &MI) const { switch (MI.getOpcode()) { + // unsigned 32 case AMDGPU::V_CMP_EQ_U32_e64: return AMDGPU::V_CMP_NE_U32_e64; case AMDGPU::V_CMP_NE_U32_e64: @@ -119,19 +120,19 @@ class SIFoldOperandsImpl { return AMDGPU::V_CMP_LE_U32_e64; case AMDGPU::V_CMP_LT_U32_e64: return AMDGPU::V_CMP_GE_U32_e64; - - // case AMDGPU::V_CMP_EQ_U32_e64: - // return AMDGPU::V_CMP_NE_U32_e64; - // case AMDGPU::V_CMP_NE_U32_e64: - // return AMDGPU::V_CMP_EQ_U32_e64; - // case AMDGPU::V_CMP_GE_U32_e64: - // return AMDGPU::V_CMP_LT_U32_e64; - // case AMDGPU::V_CMP_LE_U32_e64: - // return AMDGPU::V_CMP_GT_U32_e64; - // case AMDGPU::V_CMP_GT_U32_e64: - // return AMDGPU::V_CMP_LE_U32_e64; - // case AMDGPU::V_CMP_LT_U32_e64: - // return AMDGPU::V_CMP_GE_U32_e64; + // float 32 + case AMDGPU::V_CMP_EQ_F32_e64: + return AMDGPU::V_CMP_NEQ_F32_e64; + case AMDGPU::V_CMP_NEQ_F32_e64: + return AMDGPU::V_CMP_EQ_F32_e64; + case AMDGPU::V_CMP_GE_F32_e64: + return AMDGPU::V_CMP_LT_F32_e64; + case AMDGPU::V_CMP_LE_F32_e64: + return AMDGPU::V_CMP_GT_F32_e64; + case AMDGPU::V_CMP_GT_F32_e64: + return AMDGPU::V_CMP_LE_F32_e64; + case AMDGPU::V_CMP_LT_F32_e64: + return AMDGPU::V_CMP_GE_F32_e64; default: return 0; } @@ -139,7 +140,6 @@ class SIFoldOperandsImpl { bool foldCopyToVGPROfScalarAddOfFrameIndex(Register DstReg, Register SrcReg, MachineInstr &MI) const; - bool updateOperand(FoldCandidate &Fold) const; bool canUseImmWithOpSel(FoldCandidate &Fold) const; @@ -1541,11 +1541,17 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI, Register *RegVCC, auto cmpDL = DefMI->getDebugLoc(); *NewVCC = MRI->createVirtualRegister(MRI->getRegClass(Reg)); *RegVCC = Reg; - MachineInstrBuilder inverseCompare = BuildMI( + MachineInstrBuilder InverseCompare = BuildMI( *DefMI->getParent(), DefMI, cmpDL, TII->get(Opcode), *NewVCC); - - inverseCompare.add(DefMI->getOperand(1)); - inverseCompare.add(DefMI->getOperand(2)); + InverseCompare->setFlags(DefMI->getFlags()); + + unsigned OpNum = DefMI->getNumExplicitOperands(); + for (unsigned i = 1; i < OpNum; i++) { + MachineOperand Op = DefMI->getOperand(i); + InverseCompare.add(Op); + if (Op.isReg() && Op.isKill()) + InverseCompare->getOperand(i).setIsKill(false); + } } } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll index 99261cc269858..297541203a9de 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -11,13 +11,13 @@ define float @v_pow_f32(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -35,13 +35,13 @@ define float @v_pow_f32(float %x, float %y) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -59,13 +59,13 @@ define float @v_pow_f32(float %x, float %y) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -82,11 +82,11 @@ define float @v_pow_f32(float %x, float %y) { ; GFX10-LABEL: v_pow_f32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 @@ -101,12 +101,12 @@ define float @v_pow_f32(float %x, float %y) { ; GFX11-LABEL: v_pow_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -1004,13 +1004,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX6-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1028,13 +1028,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX8-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1052,13 +1052,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX9-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1075,11 +1075,11 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX10-LABEL: v_pow_f32_fabs_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0| -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: v_cmp_le_f32_e64 s4, 0x800000, |v0| +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1, 0, s4 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX10-NEXT: v_ldexp_f32 v0, |v0|, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s4 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 @@ -1094,13 +1094,13 @@ define float @v_pow_f32_fabs_lhs(float %x, float %y) { ; GFX11-LABEL: v_pow_f32_fabs_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX11-NEXT: v_cmp_le_f32_e64 s0, 0x800000, |v0| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, s0 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_ldexp_f32 v0, |v0|, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -1125,13 +1125,13 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1149,13 +1149,13 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1173,13 +1173,13 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1196,11 +1196,11 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX10-LABEL: v_pow_f32_fabs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| @@ -1215,12 +1215,12 @@ define float @v_pow_f32_fabs_rhs(float %x, float %y) { ; GFX11-LABEL: v_pow_f32_fabs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -1246,13 +1246,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX6-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1270,13 +1270,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX8-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1294,13 +1294,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX9-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1317,11 +1317,11 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, |v0| -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: v_cmp_le_f32_e64 s4, 0x800000, |v0| +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1, 0, s4 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX10-NEXT: v_ldexp_f32 v0, |v0|, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s4 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, |v1| @@ -1336,13 +1336,13 @@ define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { ; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| +; GFX11-NEXT: v_cmp_le_f32_e64 s0, 0x800000, |v0| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, s0 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_ldexp_f32 v0, |v0|, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -1367,13 +1367,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; GFX6-LABEL: v_pow_f32_sgpr_vgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e32 vcc, s0, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX6-NEXT: v_ldexp_f32_e32 v1, s0, v1 ; GFX6-NEXT: v_log_f32_e32 v1, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX6-NEXT: v_sub_f32_e32 v1, v1, v2 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1390,13 +1390,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; GFX8-LABEL: v_pow_f32_sgpr_vgpr: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, s0, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX8-NEXT: v_ldexp_f32 v1, s0, v1 ; GFX8-NEXT: v_log_f32_e32 v1, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX8-NEXT: v_sub_f32_e32 v1, v1, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1413,13 +1413,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; GFX9-LABEL: v_pow_f32_sgpr_vgpr: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e32 vcc, s0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX9-NEXT: v_ldexp_f32 v1, s0, v1 ; GFX9-NEXT: v_log_f32_e32 v1, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: v_sub_f32_e32 v1, v1, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1435,9 +1435,9 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; ; GFX10-LABEL: v_pow_f32_sgpr_vgpr: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_cmp_gt_f32_e64 s1, 0x800000, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s1 +; GFX10-NEXT: v_cmp_le_f32_e64 s1, 0x800000, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 1, 0, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s1 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX10-NEXT: v_ldexp_f32 v1, s0, v1 ; GFX10-NEXT: v_log_f32_e32 v1, v1 @@ -1453,10 +1453,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { ; ; GFX11-LABEL: v_pow_f32_sgpr_vgpr: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_cmp_gt_f32_e64 s1, 0x800000, s0 +; GFX11-NEXT: v_cmp_le_f32_e64 s1, 0x800000, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 1, 0, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s1 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_ldexp_f32 v1, s0, v1 @@ -1482,13 +1482,13 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX6-LABEL: v_pow_f32_vgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX6-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1 +; GFX6-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 @@ -1505,13 +1505,13 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX8-LABEL: v_pow_f32_vgpr_sgpr: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 @@ -1528,13 +1528,13 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; GFX9-LABEL: v_pow_f32_vgpr_sgpr: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v1, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 @@ -1550,11 +1550,11 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; ; GFX10-LABEL: v_pow_f32_vgpr_sgpr: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc_lo ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x42000000, 0, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 @@ -1568,12 +1568,12 @@ define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { ; ; GFX11-LABEL: v_pow_f32_vgpr_sgpr: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x42000000, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff @@ -1597,13 +1597,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; GFX6-LABEL: v_pow_f32_sgpr_sgpr: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e32 vcc, s0, v0 +; GFX6-NEXT: v_cndmask_b32_e64 v0, 1, 0, vcc ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX6-NEXT: v_ldexp_f32_e32 v0, s0, v0 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 @@ -1620,13 +1620,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; GFX8-LABEL: v_pow_f32_sgpr_sgpr: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, s0, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v0, 1, 0, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX8-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 @@ -1643,13 +1643,13 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; GFX9-LABEL: v_pow_f32_sgpr_sgpr: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e32 vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 1, 0, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX9-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 @@ -1665,9 +1665,9 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; ; GFX10-LABEL: v_pow_f32_sgpr_sgpr: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2 +; GFX10-NEXT: v_cmp_le_f32_e64 s2, 0x800000, s0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 1, 0, s2 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0x42000000, 0, s2 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX10-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX10-NEXT: v_log_f32_e32 v0, v0 @@ -1683,10 +1683,10 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { ; ; GFX11-LABEL: v_pow_f32_sgpr_sgpr: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 +; GFX11-NEXT: v_cmp_le_f32_e64 s2, 0x800000, s0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 1, 0, s2 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0x42000000, 0, s2 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_ldexp_f32 v0, s0, v0 @@ -1713,13 +1713,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e64 s[4:5], -v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX6-NEXT: v_ldexp_f32_e64 v0, -v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1737,13 +1737,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e64 s[4:5], -v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX8-NEXT: v_ldexp_f32 v0, -v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1761,13 +1761,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], -v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1, 0, s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX9-NEXT: v_ldexp_f32 v0, -v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1784,11 +1784,11 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX10-LABEL: v_pow_f32_fneg_lhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, -v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10-NEXT: v_cmp_le_f32_e64 s4, 0x800000, -v0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1, 0, s4 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX10-NEXT: v_ldexp_f32 v0, -v0, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s4 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 @@ -1803,13 +1803,13 @@ define float @v_pow_f32_fneg_lhs(float %x, float %y) { ; GFX11-LABEL: v_pow_f32_fneg_lhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0 +; GFX11-NEXT: v_cmp_le_f32_e64 s0, 0x800000, -v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, s0 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_ldexp_f32 v0, -v0, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, s0 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 @@ -1834,13 +1834,13 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX6-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX6-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX6-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX6-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_log_f32_e32 v0, v0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX6-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1858,13 +1858,13 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1882,13 +1882,13 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -1905,11 +1905,11 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX10-LABEL: v_pow_f32_fneg_rhs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 @@ -1924,12 +1924,12 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) { ; GFX11-LABEL: v_pow_f32_fneg_rhs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll index fe002d69faf66..01461a09b5c2a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll @@ -79,14 +79,14 @@ define float @v_powi_f32(float %l, i32 %r) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GFX7-NEXT: v_log_f32_e32 v0, v0 ; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -104,14 +104,14 @@ define float @v_powi_f32(float %l, i32 %r) { ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 -; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX8-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 @@ -128,13 +128,13 @@ define float @v_powi_f32(float %l, i32 %r) { ; GFX11-LABEL: v_powi_f32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 +; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0x800000, v0 ; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0x42000000, 0, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll index ee3bf96111994..1991afa83870a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/select-to-fmin-fmax.ll @@ -18,8 +18,8 @@ define float @test_s32(float %a) #0 { ; GCN-LABEL: test_s32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt float %a, 0.0 @@ -111,10 +111,10 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 { ; GCN-LABEL: test_v2s32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v1 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt <2 x float> %a, zeroinitializer @@ -126,14 +126,14 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 { ; GCN-LABEL: test_v4s32: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1 -; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v2 -; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3 -; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v1 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v2 +; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v3 +; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %fcmp = fcmp olt <4 x float> %a, zeroinitializer diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index 5bda853b76727..b494ff8ba1f5d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -1,773 +1,2719 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-prelink | FileCheck %s - -declare hidden float @_Z3powff(float, float) -declare hidden double @_Z3powdd(double, double) -declare hidden half @_Z3powDhDh(half, half) - -declare hidden float @_Z4powrff(float, float) -declare hidden double @_Z4powrdd(double, double) -declare hidden half @_Z4powrDhDh(half, half) - -declare hidden float @_Z4pownfi(float, i32) -declare hidden double @_Z4powndi(double, i32) -declare hidden half @_Z4pownDhi(half, i32) - -; -------------------------------------------------------------------- -; test pow -; -------------------------------------------------------------------- - -define half @test_pow_fast_f16(half %x, half %y) { -; CHECK-LABEL: test_pow_fast_f16: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, _Z3powDhDh@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powDhDh@rel32@hi+12 -; CHECK-NEXT: s_setpc_b64 s[16:17] - %pow = tail call fast half @_Z3powDhDh(half %x, half %y) - ret half %pow -} +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" + +declare float @_Z3powff(float, float) +declare <2 x float> @_Z3powDv2_fS_(<2 x float>, <2 x float>) +declare <3 x float> @_Z3powDv3_fS_(<3 x float>, <3 x float>) +declare <4 x float> @_Z3powDv4_fS_(<4 x float>, <4 x float>) +declare <8 x float> @_Z3powDv8_fS_(<8 x float>, <8 x float>) +declare <16 x float> @_Z3powDv16_fS_(<16 x float>, <16 x float>) +declare double @_Z3powdd(double, double) +declare <2 x double> @_Z3powDv2_dS_(<2 x double>, <2 x double>) +declare <3 x double> @_Z3powDv3_dS_(<3 x double>, <3 x double>) +declare <4 x double> @_Z3powDv4_dS_(<4 x double>, <4 x double>) +declare <8 x double> @_Z3powDv8_dS_(<8 x double>, <8 x double>) +declare <16 x double> @_Z3powDv16_dS_(<16 x double>, <16 x double>) +declare half @_Z3powDhDh(half, half) +declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>) +declare <3 x half> @_Z3powDv3_DhS_(<3 x half>, <3 x half>) +declare <4 x half> @_Z3powDv4_DhS_(<4 x half>, <4 x half>) +declare <8 x half> @_Z3powDv8_DhS_(<8 x half>, <8 x half>) +declare <16 x half> @_Z3powDv16_DhS_(<16 x half>, <16 x half>) +declare void @llvm.assume(i1 noundef) +declare float @llvm.floor.f32(float) +declare float @llvm.ceil.f32(float) +declare float @llvm.trunc.f32(float) +declare float @llvm.rint.f32(float) +declare float @llvm.nearbyint.f32(float) +declare float @llvm.round.f32(float) +declare float @llvm.roundeven.f32(float) define float @test_pow_fast_f32(float %x, float %y) { -; CHECK-LABEL: test_pow_fast_f32: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, _Z3powff@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powff@rel32@hi+12 -; CHECK-NEXT: s_setpc_b64 s[16:17] +; CHECK-LABEL: define float @test_pow_fast_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call fast float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; %pow = tail call fast float @_Z3powff(float %x, float %y) ret float %pow } -define double @test_pow_fast_f64(double %x, double %y) { -; CHECK-LABEL: test_pow_fast_f64: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_getpc_b64 s[16:17] -; CHECK-NEXT: s_add_u32 s16, s16, _Z3powdd@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powdd@rel32@hi+12 -; CHECK-NEXT: s_setpc_b64 s[16:17] - %pow = tail call fast double @_Z3powdd(double %x, double %y) +define <2 x float> @test_pow_fast_v2f32(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_fast_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call fast <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call fast <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_nnan(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn nnan float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn nnan <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float %y) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define <3 x float> @test_pow_afn_v3f32(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[POW]] +; + %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> %y) + ret <3 x float> %pow +} + +define <4 x float> @test_pow_afn_v4f32(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: define <4 x float> @test_pow_afn_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <4 x float> @_Z3powDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[POW]] +; + %pow = tail call afn <4 x float> @_Z3powDv4_fS_(<4 x float> %x, <4 x float> %y) + ret <4 x float> %pow +} + +define <8 x float> @test_pow_afn_v8f32(<8 x float> %x, <8 x float> %y) { +; CHECK-LABEL: define <8 x float> @test_pow_afn_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <8 x float> @_Z3powDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[POW]] +; + %pow = tail call afn <8 x float> @_Z3powDv8_fS_(<8 x float> %x, <8 x float> %y) + ret <8 x float> %pow +} + +define <16 x float> @test_pow_afn_v16f32(<16 x float> %x, <16 x float> %y) { +; CHECK-LABEL: define <16 x float> @test_pow_afn_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <16 x float> @_Z3powDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[POW]] +; + %pow = tail call afn <16 x float> @_Z3powDv16_fS_(<16 x float> %x, <16 x float> %y) + ret <16 x float> %pow +} + +define double @test_pow_afn_f64(double %x, double %y) { +; CHECK-LABEL: define double @test_pow_afn_f64 +; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn double @_Z3powdd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn double @_Z3powdd(double %x, double %y) ret double %pow } -define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) { -; CHECK-LABEL: test_pow_fast_f16__integral_y: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 -; CHECK-NEXT: v_log_f16_e64 v3, |v0| -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1 -; CHECK-NEXT: v_cvt_f32_i32_e32 v2, v1 -; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1 -; CHECK-NEXT: v_and_b32_e32 v0, v1, v0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f16_e32 v2, v3, v2 -; CHECK-NEXT: v_exp_f16_e32 v2, v2 -; CHECK-NEXT: v_or_b32_e32 v0, v0, v2 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = sitofp i32 %y.i to half - %pow = tail call fast half @_Z3powDhDh(half %x, half %y) +define <2 x double> @test_pow_afn_v2f64(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[POW]] +; + %pow = tail call afn <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %pow +} + +define <3 x double> @test_pow_afn_v3f64(<3 x double> %x, <3 x double> %y) { +; CHECK-LABEL: define <3 x double> @test_pow_afn_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x double> @_Z3powDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[POW]] +; + %pow = tail call afn <3 x double> @_Z3powDv3_dS_(<3 x double> %x, <3 x double> %y) + ret <3 x double> %pow +} + +define <4 x double> @test_pow_afn_v4f64(<4 x double> %x, <4 x double> %y) { +; CHECK-LABEL: define <4 x double> @test_pow_afn_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <4 x double> @_Z3powDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[POW]] +; + %pow = tail call afn <4 x double> @_Z3powDv4_dS_(<4 x double> %x, <4 x double> %y) + ret <4 x double> %pow +} + +define <8 x double> @test_pow_afn_v8f64(<8 x double> %x, <8 x double> %y) { +; CHECK-LABEL: define <8 x double> @test_pow_afn_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <8 x double> @_Z3powDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[POW]] +; + %pow = tail call afn <8 x double> @_Z3powDv8_dS_(<8 x double> %x, <8 x double> %y) + ret <8 x double> %pow +} + +define <16 x double> @test_pow_afn_v16f64(<16 x double> %x, <16 x double> %y) { +; CHECK-LABEL: define <16 x double> @test_pow_afn_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <16 x double> @_Z3powDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[POW]] +; + %pow = tail call afn <16 x double> @_Z3powDv16_dS_(<16 x double> %x, <16 x double> %y) + ret <16 x double> %pow +} + +define half @test_pow_afn_f16(half %x, half %y) { +; CHECK-LABEL: define half @test_pow_afn_f16 +; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn half @_Z3powDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn half @_Z3powDhDh(half %x, half %y) ret half %pow } -define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) { -; CHECK-LABEL: test_pow_fast_f32__integral_y: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 -; CHECK-NEXT: s_mov_b32 s4, 0x800000 -; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc -; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1 -; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 -; CHECK-NEXT: v_log_f32_e32 v3, v3 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 -; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2 -; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 -; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4 -; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000 -; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc -; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3 -; CHECK-NEXT: v_exp_f32_e32 v2, v2 -; CHECK-NEXT: v_not_b32_e32 v3, 63 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1 -; CHECK-NEXT: v_ldexp_f32 v2, v2, v3 -; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = sitofp i32 %y.i to float - %pow = tail call fast float @_Z3powff(float %x, float %y) +define <2 x half> @test_pow_afn_v2f16(<2 x half> %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[POW]] +; + %pow = tail call afn <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %pow +} + +define <3 x half> @test_pow_afn_v3f16(<3 x half> %x, <3 x half> %y) { +; CHECK-LABEL: define <3 x half> @test_pow_afn_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <3 x half> @_Z3powDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[POW]] +; + %pow = tail call afn <3 x half> @_Z3powDv3_DhS_(<3 x half> %x, <3 x half> %y) + ret <3 x half> %pow +} + +define <4 x half> @test_pow_afn_v4f16(<4 x half> %x, <4 x half> %y) { +; CHECK-LABEL: define <4 x half> @test_pow_afn_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <4 x half> @_Z3powDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[POW]] +; + %pow = tail call afn <4 x half> @_Z3powDv4_DhS_(<4 x half> %x, <4 x half> %y) + ret <4 x half> %pow +} + +define <8 x half> @test_pow_afn_v8f16(<8 x half> %x, <8 x half> %y) { +; CHECK-LABEL: define <8 x half> @test_pow_afn_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <8 x half> @_Z3powDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[POW]] +; + %pow = tail call afn <8 x half> @_Z3powDv8_DhS_(<8 x half> %x, <8 x half> %y) + ret <8 x half> %pow +} + +define <16 x half> @test_pow_afn_v16f16(<16 x half> %x, <16 x half> %y) { +; CHECK-LABEL: define <16 x half> @test_pow_afn_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <16 x half> @_Z3powDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[POW]] +; + %pow = tail call afn <16 x half> @_Z3powDv16_DhS_(<16 x half> %x, <16 x half> %y) + ret <16 x half> %pow +} + +define float @test_pow_f32(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_f32 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32_nnan(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_f32_nnan +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call nnan float @_Z3powff(float %x, float %y) + ret float %pow +} + +define <2 x float> @test_pow_v2f32(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define <3 x float> @test_pow_v3f32(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_v3f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <3 x float> @_Z3powDv3_fS_(<3 x float> [[X]], <3 x float> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[POW]] +; + %pow = tail call <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> %y) + ret <3 x float> %pow +} + +define <4 x float> @test_pow_v4f32(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: define <4 x float> @test_pow_v4f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], <4 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <4 x float> @_Z3powDv4_fS_(<4 x float> [[X]], <4 x float> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[POW]] +; + %pow = tail call <4 x float> @_Z3powDv4_fS_(<4 x float> %x, <4 x float> %y) + ret <4 x float> %pow +} + +define <8 x float> @test_pow_v8f32(<8 x float> %x, <8 x float> %y) { +; CHECK-LABEL: define <8 x float> @test_pow_v8f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], <8 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <8 x float> @_Z3powDv8_fS_(<8 x float> [[X]], <8 x float> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[POW]] +; + %pow = tail call <8 x float> @_Z3powDv8_fS_(<8 x float> %x, <8 x float> %y) + ret <8 x float> %pow +} + +define <16 x float> @test_pow_v16f32(<16 x float> %x, <16 x float> %y) { +; CHECK-LABEL: define <16 x float> @test_pow_v16f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], <16 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <16 x float> @_Z3powDv16_fS_(<16 x float> [[X]], <16 x float> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[POW]] +; + %pow = tail call <16 x float> @_Z3powDv16_fS_(<16 x float> %x, <16 x float> %y) + ret <16 x float> %pow +} + +define double @test_pow_f64(double %x, double %y) { +; CHECK-LABEL: define double @test_pow_f64 +; CHECK-SAME: (double [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call double @_Z3powdd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call double @_Z3powdd(double %x, double %y) + ret double %pow +} + +define <2 x double> @test_pow_v2f64(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_pow_v2f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[POW]] +; + %pow = tail call <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %pow +} + +define <3 x double> @test_pow_v3f64(<3 x double> %x, <3 x double> %y) { +; CHECK-LABEL: define <3 x double> @test_pow_v3f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], <3 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <3 x double> @_Z3powDv3_dS_(<3 x double> [[X]], <3 x double> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[POW]] +; + %pow = tail call <3 x double> @_Z3powDv3_dS_(<3 x double> %x, <3 x double> %y) + ret <3 x double> %pow +} + +define <4 x double> @test_pow_v4f64(<4 x double> %x, <4 x double> %y) { +; CHECK-LABEL: define <4 x double> @test_pow_v4f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], <4 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <4 x double> @_Z3powDv4_dS_(<4 x double> [[X]], <4 x double> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[POW]] +; + %pow = tail call <4 x double> @_Z3powDv4_dS_(<4 x double> %x, <4 x double> %y) + ret <4 x double> %pow +} + +define <8 x double> @test_pow_v8f64(<8 x double> %x, <8 x double> %y) { +; CHECK-LABEL: define <8 x double> @test_pow_v8f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], <8 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <8 x double> @_Z3powDv8_dS_(<8 x double> [[X]], <8 x double> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[POW]] +; + %pow = tail call <8 x double> @_Z3powDv8_dS_(<8 x double> %x, <8 x double> %y) + ret <8 x double> %pow +} + +define <16 x double> @test_pow_v16f64(<16 x double> %x, <16 x double> %y) { +; CHECK-LABEL: define <16 x double> @test_pow_v16f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], <16 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <16 x double> @_Z3powDv16_dS_(<16 x double> [[X]], <16 x double> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[POW]] +; + %pow = tail call <16 x double> @_Z3powDv16_dS_(<16 x double> %x, <16 x double> %y) + ret <16 x double> %pow +} + +define half @test_pow_f16(half %x, half %y) { +; CHECK-LABEL: define half @test_pow_f16 +; CHECK-SAME: (half [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call half @_Z3powDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call half @_Z3powDhDh(half %x, half %y) + ret half %pow +} + +define <2 x half> @test_pow_v2f16(<2 x half> %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_pow_v2f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[POW]] +; + %pow = tail call <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %pow +} + +define <3 x half> @test_pow_v3f16(<3 x half> %x, <3 x half> %y) { +; CHECK-LABEL: define <3 x half> @test_pow_v3f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], <3 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <3 x half> @_Z3powDv3_DhS_(<3 x half> [[X]], <3 x half> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[POW]] +; + %pow = tail call <3 x half> @_Z3powDv3_DhS_(<3 x half> %x, <3 x half> %y) + ret <3 x half> %pow +} + +define <4 x half> @test_pow_v4f16(<4 x half> %x, <4 x half> %y) { +; CHECK-LABEL: define <4 x half> @test_pow_v4f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], <4 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <4 x half> @_Z3powDv4_DhS_(<4 x half> [[X]], <4 x half> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[POW]] +; + %pow = tail call <4 x half> @_Z3powDv4_DhS_(<4 x half> %x, <4 x half> %y) + ret <4 x half> %pow +} + +define <8 x half> @test_pow_v8f16(<8 x half> %x, <8 x half> %y) { +; CHECK-LABEL: define <8 x half> @test_pow_v8f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], <8 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <8 x half> @_Z3powDv8_DhS_(<8 x half> [[X]], <8 x half> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[POW]] +; + %pow = tail call <8 x half> @_Z3powDv8_DhS_(<8 x half> %x, <8 x half> %y) + ret <8 x half> %pow +} + +define <16 x half> @test_pow_v16f16(<16 x half> %x, <16 x half> %y) { +; CHECK-LABEL: define <16 x half> @test_pow_v16f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], <16 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <16 x half> @_Z3powDv16_DhS_(<16 x half> [[X]], <16 x half> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[POW]] +; + %pow = tail call <16 x half> @_Z3powDv16_DhS_(<16 x half> %x, <16 x half> %y) + ret <16 x half> %pow +} + +define float @test_pow_afn_f32_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_pow_afn_f32_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_minsize(float %x, float %y) #0 { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_minsize +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn nnan float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_afn_f32_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float %y) #1 + ret float %pow +} + +define float @test_pow_afn_f32_nnan_noinline(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_noinline +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR5]] +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn nnan float @_Z3powff(float %x, float %y) #1 + ret float %pow +} + +define float @test_pow_afn_f32_strictfp(float %x, float %y) #2 { +; CHECK-LABEL: define float @test_pow_afn_f32_strictfp +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan nsz afn float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR3]] +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn nsz nnan float @_Z3powff(float %x, float %y) #2 + ret float %pow +} + +define float @test_pow_fast_f32_nobuiltin(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_fast_f32_nobuiltin +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call fast float @_Z3powff(float [[X]], float [[Y]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call fast float @_Z3powff(float %x, float %y) #3 + ret float %pow +} + +define float @test_pow_afn_f32_0.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_0.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: ret float 1.000000e+00 +; + %pow = tail call afn float @_Z3powff(float %x, float 0.0) + ret float %pow +} + +define float @test_pow_afn_f32_neg0.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg0.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: ret float 1.000000e+00 +; + %pow = tail call afn float @_Z3powff(float %x, float -0.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_0.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg0.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) +; + %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) + ret <3 x float> %pow +} + +define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: ret <3 x float> splat (float 1.000000e+00) +; + %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) + ret <3 x float> %pow +} + +define float @test_pow_afn_f32_0.5(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_0.5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call afn float @_Z4sqrtf(float [[X]]) +; CHECK-NEXT: ret float [[__POW2SQRT]] +; + %pow = tail call afn float @_Z3powff(float %x, float 0.5) + ret float %pow +} + +define float @test_pow_afn_f32_neg0.5(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg0.5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call afn float @_Z5rsqrtf(float [[X]]) +; CHECK-NEXT: ret float [[__POW2RSQRT]] +; + %pow = tail call afn float @_Z3powff(float %x, float -0.5) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_0.5(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_0.5 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call afn <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]]) +; CHECK-NEXT: ret <2 x float> [[__POW2SQRT]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg0.5(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg0.5 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call afn <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]]) +; CHECK-NEXT: ret <2 x float> [[__POW2RSQRT]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_0.5(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.5 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.5_splat_undef +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call afn <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]]) +; CHECK-NEXT: ret <3 x float> [[__POW2SQRT]] +; + %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) + ret <3 x float> %pow +} + +define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.5_splat_undef +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call afn <3 x float> @_Z5rsqrtDv3_f(<3 x float> [[X]]) +; CHECK-NEXT: ret <3 x float> [[__POW2RSQRT]] +; + %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) + ret <3 x float> %pow +} + +define float @test_pow_afn_f32_1.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_1.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: ret float [[X]] +; + %pow = tail call afn float @_Z3powff(float %x, float 1.0) + ret float %pow +} + +define float @test_pow_afn_f32_neg1.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg1.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn float 1.000000e+00, [[X]] +; CHECK-NEXT: ret float [[__POWRECIP]] +; + %pow = tail call afn float @_Z3powff(float %x, float -1.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_1.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_1.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> [[X]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg1.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg1.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] +; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_1.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_1.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: ret <3 x float> [[X]] +; + %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) + ret <3 x float> %pow +} + +define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef +; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]] +; CHECK-NEXT: ret <3 x float> [[__POWRECIP]] +; + %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> ) + ret <3 x float> %pow +} + +define float @test_pow_afn_f32_2.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_2.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POW2:%.*]] = fmul afn float [[X]], [[X]] +; CHECK-NEXT: ret float [[__POW2]] +; + %pow = tail call afn float @_Z3powff(float %x, float 2.0) + ret float %pow +} + +define float @test_pow_afn_f32_neg2.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg2.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -2) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float -2.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_2.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_2.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POW2:%.*]] = fmul afn <2 x float> [[X]], [[X]] +; CHECK-NEXT: ret <2 x float> [[__POW2]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg2.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg2.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_2.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_2.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_3.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_3.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 3) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 3.0) + ret float %pow +} + +define float @test_pow_afn_f32_neg3.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg3.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -3) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float -3.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg3.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_3.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_3.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_3.99(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_3.99 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 0x400FEB8520000000) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 0x400FEB8520000000) + ret float %pow +} + +define float @test_pow_afn_f32_neg3.99(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg3.99 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z3powff(float [[X]], float 0xC00FEB8520000000) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 0xC00FEB8520000000) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.99 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg3.99(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.99 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_3.99(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_3.99 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_8.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_8.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 8) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 8.0) + ret float %pow +} + +define float @test_pow_afn_f32_neg8.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg8.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -8) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float -8.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_8.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 8)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg8.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg8.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -8)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_8.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_8.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_12.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_12.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 12) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 12.0) + ret float %pow +} + +define float @test_pow_afn_f32_neg12.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg12.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -12) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float -12.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_12.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 12)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg12.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg12.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -12)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_12.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_12.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_13.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_13.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 13) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 13.0) + ret float %pow +} + +define float @test_pow_afn_f32_neg13.0(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_neg13.0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -13) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float -13.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 13)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_neg13.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg13.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -13)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_13.0_15.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0_15.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_13.0_14.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0_14.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_14.0_16.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_14.0_16.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0_minus_14.0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_13.0_minus_14.0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_nnan_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_x_known_positive +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn nnan float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf_x_known_positive +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[Y]], [[__LOG2]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: ret float [[__EXP2]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_x_known_positive +; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn nnan <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf_x_known_positive +; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[Y]], [[__LOG2]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) +; CHECK-NEXT: ret <2 x float> [[__EXP2]] +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define float @test_pow_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) { +; CHECK-LABEL: define float @test_pow_f32_x_known_positive +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float %y) ret float %pow } -define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { -; CHECK-LABEL: test_pow_fast_f64__integral_y: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s16, s33 -; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 -; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 -; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 -; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 -; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 -; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 -; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: v_mov_b32_e32 v41, v2 -; CHECK-NEXT: s_mov_b32 s50, s15 -; CHECK-NEXT: s_mov_b32 s51, s14 -; CHECK-NEXT: s_mov_b32 s52, s13 -; CHECK-NEXT: s_mov_b32 s53, s12 -; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] -; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] -; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s53 -; CHECK-NEXT: s_mov_b32 s13, s52 -; CHECK-NEXT: s_mov_b32 s14, s51 -; CHECK-NEXT: s_mov_b32 s15, s50 -; CHECK-NEXT: v_mov_b32_e32 v31, v40 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_lshlrev_b32_e32 v2, 31, v41 -; CHECK-NEXT: v_and_b32_e32 v2, v2, v42 -; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 -; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_readlane_b32 s4, v43, 14 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 -; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_mov_b32 s33, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = sitofp i32 %y.i to double - %pow = tail call fast double @_Z3powdd(double %x, double %y) +define float @test_pow_afn_f32_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_x_known_positive +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float %y) + ret float %pow +} + +define <2 x float> @test_pow_v2f32_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32_x_known_positive +; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_x_known_positive +; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y) + ret <2 x float> %pow +} + +define double @test_pow_afn_f64_nnan_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_x_known_positive +; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn double @_Z4powrdd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn nnan double @_Z3powdd(double %x, double %y) ret double %pow } -; -------------------------------------------------------------------- -; test powr -; -------------------------------------------------------------------- - -define half @test_powr_fast_f16(half %x, half %y) { -; CHECK-LABEL: test_powr_fast_f16: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_log_f16_e32 v0, v0 -; CHECK-NEXT: v_mul_f16_e32 v0, v1, v0 -; CHECK-NEXT: v_exp_f16_e32 v0, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %powr = tail call fast half @_Z4powrDhDh(half %x, half %y) - ret half %powr -} - -define float @test_powr_fast_f32(float %x, float %y) { -; CHECK-LABEL: test_powr_fast_f32: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, 0x800000 -; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc -; CHECK-NEXT: v_ldexp_f32 v0, v0, v3 -; CHECK-NEXT: v_log_f32_e32 v0, v0 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 -; CHECK-NEXT: v_sub_f32_e32 v0, v0, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, v1, v0 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x42800000 -; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc -; CHECK-NEXT: v_fma_f32 v0, v1, v0, v2 -; CHECK-NEXT: v_exp_f32_e32 v0, v0 -; CHECK-NEXT: v_not_b32_e32 v1, 63 -; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; CHECK-NEXT: v_ldexp_f32 v0, v0, v1 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %powr = tail call fast float @_Z4powrff(float %x, float %y) - ret float %powr -} - -define double @test_powr_fast_f64(double %x, double %y) { -; CHECK-LABEL: test_powr_fast_f64: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s16, s33 -; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 -; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 -; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 -; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 -; CHECK-NEXT: v_mov_b32_e32 v42, v31 -; CHECK-NEXT: v_mov_b32_e32 v41, v3 -; CHECK-NEXT: v_mov_b32_e32 v40, v2 -; CHECK-NEXT: s_mov_b32 s50, s15 -; CHECK-NEXT: s_mov_b32 s51, s14 -; CHECK-NEXT: s_mov_b32 s52, s13 -; CHECK-NEXT: s_mov_b32 s53, s12 -; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] -; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1] -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] -; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s53 -; CHECK-NEXT: s_mov_b32 s13, s52 -; CHECK-NEXT: s_mov_b32 s14, s51 -; CHECK-NEXT: s_mov_b32 s15, s50 -; CHECK-NEXT: v_mov_b32_e32 v31, v42 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 -; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_readlane_b32 s4, v43, 14 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 -; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_mov_b32 s33, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] - %powr = tail call fast double @_Z4powrdd(double %x, double %y) - ret double %powr -} - -; -------------------------------------------------------------------- -; test pown -; -------------------------------------------------------------------- - -define half @test_pown_fast_f16(half %x, i32 %y) { -; CHECK-LABEL: test_pown_fast_f16: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_i32_e32 v2, v1 -; CHECK-NEXT: v_log_f16_e64 v3, |v0| -; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1 -; CHECK-NEXT: v_and_b32_e32 v0, v1, v0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f16_e32 v2, v3, v2 -; CHECK-NEXT: v_exp_f16_e32 v2, v2 -; CHECK-NEXT: v_or_b32_e32 v0, v0, v2 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %call = tail call fast half @_Z4pownDhi(half %x, i32 %y) - ret half %call -} - -define float @test_pown_fast_f32(float %x, i32 %y) { -; CHECK-LABEL: test_pown_fast_f32: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, 0x800000 -; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc -; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 -; CHECK-NEXT: v_log_f32_e32 v3, v3 -; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2 -; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4 -; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 -; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000 -; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc -; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3 -; CHECK-NEXT: v_exp_f32_e32 v2, v2 -; CHECK-NEXT: v_not_b32_e32 v3, 63 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1 -; CHECK-NEXT: v_ldexp_f32 v2, v2, v3 -; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %call = tail call fast float @_Z4pownfi(float %x, i32 %y) - ret float %call -} - -define double @test_pown_fast_f64(double %x, i32 %y) { -; CHECK-LABEL: test_pown_fast_f64: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s16, s33 -; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 -; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 -; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 -; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 -; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 -; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 -; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: v_mov_b32_e32 v41, v2 -; CHECK-NEXT: s_mov_b32 s50, s15 -; CHECK-NEXT: s_mov_b32 s51, s14 -; CHECK-NEXT: s_mov_b32 s52, s13 -; CHECK-NEXT: s_mov_b32 s53, s12 -; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] -; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] -; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s53 -; CHECK-NEXT: s_mov_b32 s13, s52 -; CHECK-NEXT: s_mov_b32 s14, s51 -; CHECK-NEXT: s_mov_b32 s15, s50 -; CHECK-NEXT: v_mov_b32_e32 v31, v40 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_lshlrev_b32_e32 v2, 31, v41 -; CHECK-NEXT: v_and_b32_e32 v2, v2, v42 -; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 -; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_readlane_b32 s4, v43, 14 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 -; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_mov_b32 s33, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] - %call = tail call fast double @_Z4powndi(double %x, i32 %y) - ret double %call -} - -define half @test_pown_fast_f16_known_even(half %x, i32 %y.arg) { -; CHECK-LABEL: test_pown_fast_f16_known_even: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 -; CHECK-NEXT: v_log_f16_e64 v0, |v0| -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f16_e32 v0, v0, v1 -; CHECK-NEXT: v_exp_f16_e32 v0, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = shl i32 %y.arg, 1 - %call = tail call fast half @_Z4pownDhi(half %x, i32 %y) - ret half %call -} - -define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) { -; CHECK-LABEL: test_pown_fast_f32_known_even: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, 0x800000 -; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc -; CHECK-NEXT: v_ldexp_f32 v0, |v0|, v3 -; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; CHECK-NEXT: v_log_f32_e32 v0, v0 -; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; CHECK-NEXT: v_sub_f32_e32 v0, v0, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, v0, v1 -; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x42800000 -; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc -; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2 -; CHECK-NEXT: v_exp_f32_e32 v0, v0 -; CHECK-NEXT: v_not_b32_e32 v1, 63 -; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; CHECK-NEXT: v_ldexp_f32 v0, v0, v1 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = shl i32 %y.arg, 1 - %call = tail call fast float @_Z4pownfi(float %x, i32 %y) - ret float %call -} - -define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { -; CHECK-LABEL: test_pown_fast_f64_known_even: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s16, s33 -; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: v_writelane_b32 v42, s16, 14 -; CHECK-NEXT: v_writelane_b32 v42, s30, 0 -; CHECK-NEXT: v_writelane_b32 v42, s31, 1 -; CHECK-NEXT: v_writelane_b32 v42, s34, 2 -; CHECK-NEXT: v_writelane_b32 v42, s35, 3 -; CHECK-NEXT: v_writelane_b32 v42, s36, 4 -; CHECK-NEXT: v_writelane_b32 v42, s37, 5 -; CHECK-NEXT: v_writelane_b32 v42, s38, 6 -; CHECK-NEXT: v_writelane_b32 v42, s39, 7 -; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v42, s48, 8 -; CHECK-NEXT: v_writelane_b32 v42, s49, 9 -; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v42, s50, 10 -; CHECK-NEXT: v_writelane_b32 v42, s51, 11 -; CHECK-NEXT: v_writelane_b32 v42, s52, 12 -; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v42, s53, 13 -; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: s_mov_b32 s50, s15 -; CHECK-NEXT: s_mov_b32 s51, s14 -; CHECK-NEXT: s_mov_b32 s52, s13 -; CHECK-NEXT: s_mov_b32 s53, s12 -; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] -; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] -; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] -; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s53 -; CHECK-NEXT: s_mov_b32 s13, s52 -; CHECK-NEXT: s_mov_b32 s14, s51 -; CHECK-NEXT: s_mov_b32 s15, s50 -; CHECK-NEXT: v_mov_b32_e32 v31, v40 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s53, v42, 13 -; CHECK-NEXT: v_readlane_b32 s52, v42, 12 -; CHECK-NEXT: v_readlane_b32 s51, v42, 11 -; CHECK-NEXT: v_readlane_b32 s50, v42, 10 -; CHECK-NEXT: v_readlane_b32 s49, v42, 9 -; CHECK-NEXT: v_readlane_b32 s48, v42, 8 -; CHECK-NEXT: v_readlane_b32 s39, v42, 7 -; CHECK-NEXT: v_readlane_b32 s38, v42, 6 -; CHECK-NEXT: v_readlane_b32 s37, v42, 5 -; CHECK-NEXT: v_readlane_b32 s36, v42, 4 -; CHECK-NEXT: v_readlane_b32 s35, v42, 3 -; CHECK-NEXT: v_readlane_b32 s34, v42, 2 -; CHECK-NEXT: v_readlane_b32 s31, v42, 1 -; CHECK-NEXT: v_readlane_b32 s30, v42, 0 -; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_readlane_b32 s4, v42, 14 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 -; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_mov_b32 s33, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = shl i32 %y.arg, 1 - %call = tail call fast double @_Z4powndi(double %x, i32 %y) - ret double %call -} - -define half @test_pown_fast_f16_known_odd(half %x, i32 %y.arg) { -; CHECK-LABEL: test_pown_fast_f16_known_odd: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_or_b32_e32 v1, 1, v1 -; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 -; CHECK-NEXT: v_log_f16_e64 v2, |v0| -; CHECK-NEXT: s_movk_i32 s4, 0x7fff -; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f16_e32 v1, v2, v1 -; CHECK-NEXT: v_exp_f16_e32 v1, v1 -; CHECK-NEXT: v_bfi_b32 v0, s4, v1, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = or i32 %y.arg, 1 - %call = tail call fast half @_Z4pownDhi(half %x, i32 %y) - ret half %call -} - -define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) { -; CHECK-LABEL: test_pown_fast_f32_known_odd: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s4, 0x800000 -; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc -; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 -; CHECK-NEXT: v_or_b32_e32 v1, 1, v1 -; CHECK-NEXT: v_log_f32_e32 v3, v3 -; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2 -; CHECK-NEXT: v_mul_f32_e32 v3, v2, v1 -; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 -; CHECK-NEXT: v_mov_b32_e32 v4, 0x42800000 -; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc -; CHECK-NEXT: v_fma_f32 v1, v2, v1, v3 -; CHECK-NEXT: v_exp_f32_e32 v1, v1 -; CHECK-NEXT: v_not_b32_e32 v2, 63 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; CHECK-NEXT: s_brev_b32 s4, -2 -; CHECK-NEXT: v_ldexp_f32 v1, v1, v2 -; CHECK-NEXT: v_bfi_b32 v0, s4, v1, v0 -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = or i32 %y.arg, 1 - %call = tail call fast float @_Z4pownfi(float %x, i32 %y) - ret float %call -} - -define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { -; CHECK-LABEL: test_pown_fast_f64_known_odd: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s16, s33 -; CHECK-NEXT: s_mov_b32 s33, s32 -; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 -; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[18:19] -; CHECK-NEXT: v_writelane_b32 v43, s16, 14 -; CHECK-NEXT: v_writelane_b32 v43, s30, 0 -; CHECK-NEXT: v_writelane_b32 v43, s31, 1 -; CHECK-NEXT: v_writelane_b32 v43, s34, 2 -; CHECK-NEXT: v_writelane_b32 v43, s35, 3 -; CHECK-NEXT: v_writelane_b32 v43, s36, 4 -; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 -; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s48, 8 -; CHECK-NEXT: v_writelane_b32 v43, s49, 9 -; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s50, 10 -; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s51, 11 -; CHECK-NEXT: v_mov_b32_e32 v41, v1 -; CHECK-NEXT: v_writelane_b32 v43, s52, 12 -; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v41 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_writelane_b32 v43, s53, 13 -; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: s_mov_b32 s50, s15 -; CHECK-NEXT: s_mov_b32 s51, s14 -; CHECK-NEXT: s_mov_b32 s52, s13 -; CHECK-NEXT: s_mov_b32 s53, s12 -; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] -; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] -; CHECK-NEXT: v_or_b32_e32 v42, 1, v2 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v42 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 -; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] -; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s53 -; CHECK-NEXT: s_mov_b32 s13, s52 -; CHECK-NEXT: s_mov_b32 s14, s51 -; CHECK-NEXT: s_mov_b32 s15, s50 -; CHECK-NEXT: v_mov_b32_e32 v31, v40 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: v_and_b32_e32 v2, 0x80000000, v41 -; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s53, v43, 13 -; CHECK-NEXT: v_readlane_b32 s52, v43, 12 -; CHECK-NEXT: v_readlane_b32 s51, v43, 11 -; CHECK-NEXT: v_readlane_b32 s50, v43, 10 -; CHECK-NEXT: v_readlane_b32 s49, v43, 9 -; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 -; CHECK-NEXT: v_readlane_b32 s37, v43, 5 -; CHECK-NEXT: v_readlane_b32 s36, v43, 4 -; CHECK-NEXT: v_readlane_b32 s35, v43, 3 -; CHECK-NEXT: v_readlane_b32 s34, v43, 2 -; CHECK-NEXT: v_readlane_b32 s31, v43, 1 -; CHECK-NEXT: v_readlane_b32 s30, v43, 0 -; CHECK-NEXT: s_mov_b32 s32, s33 -; CHECK-NEXT: v_readlane_b32 s4, v43, 14 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 -; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] -; CHECK-NEXT: s_mov_b32 s33, s4 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] - %y = or i32 %y.arg, 1 - %call = tail call fast double @_Z4powndi(double %x, i32 %y) - ret double %call -} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} +define double @test_pow_afn_f64_nnan_ninf_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf_x_known_positive +; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn double @_Z4log2d(double [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn double [[Y]], [[__LOG2]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn double @_Z4exp2d(double [[__YLOGX]]) +; CHECK-NEXT: ret double [[__EXP2]] +; + %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double %y) + ret double %pow +} + +define <2 x double> @test_pow_afn_v2f64_nnan_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_x_known_positive +; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x double> @_Z4powrDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[POW]] +; + %pow = tail call afn nnan <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %pow +} + +define <2 x double> @test_pow_afn_v2f64_nnan_ninf_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf_x_known_positive +; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x double> @_Z4log2Dv2_d(<2 x double> [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[Y]], [[__LOG2]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]]) +; CHECK-NEXT: ret <2 x double> [[__EXP2]] +; + %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %pow +} + +define double @test_pow_f64_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) { +; CHECK-LABEL: define double @test_pow_f64_x_known_positive +; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call double @_Z4powrdd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call double @_Z3powdd(double %x, double %y) + ret double %pow +} + +define double @test_pow_afn_f64_x_known_positive(double nofpclass(ninf nnorm nsub) %x, double %y) { +; CHECK-LABEL: define double @test_pow_afn_f64_x_known_positive +; CHECK-SAME: (double nofpclass(ninf nsub nnorm) [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn double @_Z4powrdd(double [[X]], double [[Y]]) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn double @_Z3powdd(double %x, double %y) + ret double %pow +} + +define <2 x double> @test_pow_v2f64_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_pow_v2f64_x_known_positive +; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x double> @_Z4powrDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[POW]] +; + %pow = tail call <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %pow +} + +define <2 x double> @test_pow_afn_v2f64_x_known_positive(<2 x double> nofpclass(ninf nnorm nsub) %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_x_known_positive +; CHECK-SAME: (<2 x double> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x double> @_Z4powrDv2_dS_(<2 x double> [[X]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[POW]] +; + %pow = tail call afn <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> %y) + ret <2 x double> %pow +} + +define half @test_pow_afn_f16_nnan_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_x_known_positive +; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn half @_Z4powrDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn nnan half @_Z3powDhDh(half %x, half %y) + ret half %pow +} + +define half @test_pow_afn_f16_nnan_ninf_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf_x_known_positive +; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @llvm.log2.f16(half [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[Y]], [[__LOG2]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @llvm.exp2.f16(half [[__YLOGX]]) +; CHECK-NEXT: ret half [[__EXP2]] +; + %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half %y) + ret half %pow +} + +define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_x_known_positive +; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[POW]] +; + %pow = tail call afn nnan <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %pow +} + +define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf_x_known_positive +; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @llvm.log2.v2f16(<2 x half> [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[Y]], [[__LOG2]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]]) +; CHECK-NEXT: ret <2 x half> [[__EXP2]] +; + %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %pow +} + +define half @test_pow_f16_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) { +; CHECK-LABEL: define half @test_pow_f16_x_known_positive +; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call half @_Z4powrDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call half @_Z3powDhDh(half %x, half %y) + ret half %pow +} + +define half @test_pow_afn_f16_x_known_positive(half nofpclass(ninf nnorm nsub) %x, half %y) { +; CHECK-LABEL: define half @test_pow_afn_f16_x_known_positive +; CHECK-SAME: (half nofpclass(ninf nsub nnorm) [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn half @_Z4powrDhDh(half [[X]], half [[Y]]) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn half @_Z3powDhDh(half %x, half %y) + ret half %pow +} + +define <2 x half> @test_pow_v2f16_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_pow_v2f16_x_known_positive +; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[POW]] +; + %pow = tail call <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %pow +} + +define <2 x half> @test_pow_afn_v2f16_x_known_positive(<2 x half> nofpclass(ninf nnorm nsub) %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_x_known_positive +; CHECK-SAME: (<2 x half> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x half> @_Z4powrDv2_DhS_(<2 x half> [[X]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[POW]] +; + %pow = tail call afn <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> %y) + ret <2 x half> %pow +} + +define float @test_pow_f32__y_0(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: ret float 1.000000e+00 +; + %pow = tail call float @_Z3powff(float %x, float 0.0) + ret float %pow +} + +define float @test_pow_f32__y_n0(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_n0 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: ret float 1.000000e+00 +; + %pow = tail call float @_Z3powff(float %x, float -0.0) + ret float %pow +} + +define float @test_pow_f32__y_1(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_1 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: ret float [[X]] +; + %pow = tail call float @_Z3powff(float %x, float 1.0) + ret float %pow +} + +define float @test_pow_f32__y_n1(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_n1 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv float 1.000000e+00, [[X]] +; CHECK-NEXT: ret float [[__POWRECIP]] +; + %pow = tail call float @_Z3powff(float %x, float -1.0) + ret float %pow +} + +define float @test_pow_f32__y_2(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_2 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POW2:%.*]] = fmul float [[X]], [[X]] +; CHECK-NEXT: ret float [[__POW2]] +; + %pow = tail call float @_Z3powff(float %x, float 2.0) + ret float %pow +} + +define float @test_pow_f32__y_n2(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_n2 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 -2) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float -2.0) + ret float %pow +} + +define float @test_pow_f32__y_half(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_half +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call float @_Z4sqrtf(float [[X]]) +; CHECK-NEXT: ret float [[__POW2SQRT]] +; + %pow = tail call float @_Z3powff(float %x, float 0.5) + ret float %pow +} + +define float @test_pow_f32__y_neg_half(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_neg_half +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]]) +; CHECK-NEXT: ret float [[__POW2RSQRT]] +; + %pow = tail call float @_Z3powff(float %x, float -0.5) + ret float %pow +} + +define float @test_pow_f32__y_3(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_3 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 3) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float 3.0) + ret float %pow +} + +define float @test_pow_f32__y_n3(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_n3 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 -3) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float -3.0) + ret float %pow +} + +define float @test_pow_f32__y_2_5(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_2_5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float 2.500000e+00) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float 2.5) + ret float %pow +} + +define float @test_pow_f32__y_n_2_5(float %x) { +; CHECK-LABEL: define float @test_pow_f32__y_n_2_5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float -2.500000e+00) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float -2.5) + ret float %pow +} + +define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> zeroinitializer) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_n0(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n0 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> splat (float 1.000000e+00) +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_1(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_1 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> [[X]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_n1(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n1 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] +; CHECK-NEXT: ret <2 x float> [[__POWRECIP]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_2(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_2 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POW2:%.*]] = fmul <2 x float> [[X]], [[X]] +; CHECK-NEXT: ret <2 x float> [[__POW2]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_n2(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n2 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_half(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_half +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]]) +; CHECK-NEXT: ret <2 x float> [[__POW2SQRT]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_neg_half(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_neg_half +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]]) +; CHECK-NEXT: ret <2 x float> [[__POW2RSQRT]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_3 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n3 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_2_5 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 2.500000e+00)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32__y_n_2_5(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n_2_5 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.500000e+00)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_f32__known_positive__y_0(float nofpclass(ninf nnorm nsub) %x) { +; CHECK-LABEL: define float @test_pow_f32__known_positive__y_0 +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: ret float 1.000000e+00 +; + %pow = tail call float @_Z3powff(float %x, float 0.0) + ret float %pow +} + +define float @test_pow_f32__known_positive__y_1(float nofpclass(ninf nnorm nsub) %x) { +; CHECK-LABEL: define float @test_pow_f32__known_positive__y_1 +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: ret float [[X]] +; + %pow = tail call float @_Z3powff(float %x, float 1.0) + ret float %pow +} + +define float @test_pow_f32__known_positive__y_neg1(float nofpclass(ninf nnorm nsub) %x) { +; CHECK-LABEL: define float @test_pow_f32__known_positive__y_neg1 +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv float 1.000000e+00, [[X]] +; CHECK-NEXT: ret float [[__POWRECIP]] +; + %pow = tail call float @_Z3powff(float %x, float -1.0) + ret float %pow +} + +define float @test_pow_f32__known_positive__y_2(float nofpclass(ninf nnorm nsub) %x) { +; CHECK-LABEL: define float @test_pow_f32__known_positive__y_2 +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: [[__POW2:%.*]] = fmul float [[X]], [[X]] +; CHECK-NEXT: ret float [[__POW2]] +; + %pow = tail call float @_Z3powff(float %x, float 2.0) + ret float %pow +} + +define float @test_pow_f32__known_positive__y_half(float nofpclass(ninf nnorm nsub) %x) { +; CHECK-LABEL: define float @test_pow_f32__known_positive__y_half +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: [[__POW2SQRT:%.*]] = call float @_Z4sqrtf(float [[X]]) +; CHECK-NEXT: ret float [[__POW2SQRT]] +; + %pow = tail call float @_Z3powff(float %x, float 0.5) + ret float %pow +} + +define float @test_pow_f32__known_positive__y_neghalf(float nofpclass(ninf nnorm nsub) %x) { +; CHECK-LABEL: define float @test_pow_f32__known_positive__y_neghalf +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call float @_Z5rsqrtf(float [[X]]) +; CHECK-NEXT: ret float [[__POW2RSQRT]] +; + %pow = tail call float @_Z3powff(float %x, float -0.5) + ret float %pow +} + +define float @test_pow_f32_x_assumed_oge_0(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_f32_x_assumed_oge_0 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[X_OGE_ZERO:%.*]] = fcmp oge float [[X]], 0.000000e+00 +; CHECK-NEXT: call void @llvm.assume(i1 [[X_OGE_ZERO]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %x.oge.zero = fcmp oge float %x, 0.0 + call void @llvm.assume(i1 %x.oge.zero) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32_x_assumed_ogt_0(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_f32_x_assumed_ogt_0 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[X_OGE_ZERO:%.*]] = fcmp ogt float [[X]], 0.000000e+00 +; CHECK-NEXT: call void @llvm.assume(i1 [[X_OGE_ZERO]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %x.oge.zero = fcmp ogt float %x, 0.0 + call void @llvm.assume(i1 %x.oge.zero) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32_x_assumed_uge_0(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_f32_x_assumed_uge_0 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[X_UGE_ZERO:%.*]] = fcmp uge float [[X]], 0.000000e+00 +; CHECK-NEXT: call void @llvm.assume(i1 [[X_UGE_ZERO]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %x.uge.zero = fcmp uge float %x, 0.0 + call void @llvm.assume(i1 %x.uge.zero) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32_x_assumed_ugt_0(float %x, float %y) { +; CHECK-LABEL: define float @test_pow_f32_x_assumed_ugt_0 +; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[X_UGT_ZERO:%.*]] = fcmp ugt float [[X]], 0.000000e+00 +; CHECK-NEXT: call void @llvm.assume(i1 [[X_UGT_ZERO]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %x.ugt.zero = fcmp ugt float %x, 0.0 + call void @llvm.assume(i1 %x.ugt.zero) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_afn_f32__y_poison(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32__y_poison +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 poison) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float poison) + ret float %pow +} + +define float @test_pow_afn_f32__y_3(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32__y_3 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 3) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 3.0) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_3(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_3 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX2]] +; CHECK-NEXT: ret float [[__POWPROD]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 3.0) + ret float %pow +} + +define float @test_pow_afn_f32__y_4(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32__y_4 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 4) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 4.0) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_4(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_4 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: ret float [[__POWX21]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 4.0) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_4_5(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_4_5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn float @_Z3powff(float [[X]], float 4.500000e+00) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 4.5) + ret float %pow +} + +define float @test_pow_afn_f32__y_5(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32__y_5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 5) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float 5.0) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_5(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]] +; CHECK-NEXT: ret float [[__POWPROD]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 5.0) + ret float %pow +} + +define float @test_pow_afn_f32__y_neg5(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32__y_neg5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 -5) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call afn float @_Z3powff(float %x, float -5.0) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_neg5(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_neg5 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]] +; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn float 1.000000e+00, [[__POWPROD]] +; CHECK-NEXT: ret float [[__1POWPROD]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float -5.0) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_10(float %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_10 +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWX22:%.*]] = fmul nnan ninf afn float [[__POWX21]], [[__POWX21]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX22]] +; CHECK-NEXT: ret float [[__POWPROD]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 10.0) + ret float %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_poison +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: ret <2 x float> poison +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> poison) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_3 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[__POWX2]] +; CHECK-NEXT: ret <2 x float> [[__POWPROD]] +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x float> [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: ret <2 x float> [[__POWX21]] +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 4.500000e+00)) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5_undef(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5_undef +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_5(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_5 +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x float> [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[__POWX21]] +; CHECK-NEXT: ret <2 x float> [[__POWPROD]] +; + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive(float nofpclass(ninf nsub nnorm) %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]] +; CHECK-NEXT: ret float [[__POWPROD]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 5.0) + ret float %pow +} + +; we know we can ignore missing ninf on the input from the flag on the call +define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive_with_ninf_flag(float nofpclass(nsub nnorm) %x) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_5_known_positive_with_ninf_flag +; CHECK-SAME: (float nofpclass(nsub nnorm) [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn float [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn float [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn float [[X]], [[__POWX21]] +; CHECK-NEXT: ret float [[__POWPROD]] +; + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float 5.0) + ret float %pow +} + +define double @test_pow_afn_f64__y_3(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64__y_3 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 3) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn double @_Z3powdd(double %x, double 3.0) + ret double %pow +} + +define double @test_pow_afn_f64_nnan_ninf__y_3(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_3 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn double [[X]], [[__POWX2]] +; CHECK-NEXT: ret double [[__POWPROD]] +; + %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 3.0) + ret double %pow +} + +define double @test_pow_afn_f64__y_4(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64__y_4 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 4) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn double @_Z3powdd(double %x, double 4.0) + ret double %pow +} + +define double @test_pow_afn_f64_nnan_ninf__y_4(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_4 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: ret double [[__POWX21]] +; + %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 4.0) + ret double %pow +} + +define double @test_pow_afn_f64_nnan_ninf__y_4_5(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_4_5 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn double @_Z3powdd(double [[X]], double 4.500000e+00) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 4.5) + ret double %pow +} + +define double @test_pow_afn_f64__y_5(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64__y_5 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 5) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn double @_Z3powdd(double %x, double 5.0) + ret double %pow +} + +define double @test_pow_afn_f64_nnan_ninf__y_5(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_5 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn double [[X]], [[__POWX21]] +; CHECK-NEXT: ret double [[__POWPROD]] +; + %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 5.0) + ret double %pow +} + +define double @test_pow_afn_f64__y_neg5(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64__y_neg5 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn double @_Z4powndi(double [[X]], i32 -5) +; CHECK-NEXT: ret double [[POW]] +; + %pow = tail call afn double @_Z3powdd(double %x, double -5.0) + ret double %pow +} + +define double @test_pow_afn_f64_nnan_ninf__y_neg5(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_neg5 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn double [[X]], [[__POWX21]] +; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn double 1.000000e+00, [[__POWPROD]] +; CHECK-NEXT: ret double [[__1POWPROD]] +; + %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double -5.0) + ret double %pow +} + +define double @test_pow_afn_f64_nnan_ninf__y_10(double %x) { +; CHECK-LABEL: define double @test_pow_afn_f64_nnan_ninf__y_10 +; CHECK-SAME: (double [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn double [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWX22:%.*]] = fmul nnan ninf afn double [[__POWX21]], [[__POWX21]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn double [[__POWX2]], [[__POWX22]] +; CHECK-NEXT: ret double [[__POWPROD]] +; + %pow = tail call afn nnan ninf double @_Z3powdd(double %x, double 10.0) + ret double %pow +} + +define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_3(<2 x double> %x) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_3 +; CHECK-SAME: (<2 x double> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[X]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[__POWX2]] +; CHECK-NEXT: ret <2 x double> [[__POWPROD]] +; + %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> ) + ret <2 x double> %pow +} + +define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4(<2 x double> %x) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4 +; CHECK-SAME: (<2 x double> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x double> [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: ret <2 x double> [[__POWX21]] +; + %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> ) + ret <2 x double> %pow +} + +define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5(<2 x double> %x) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5 +; CHECK-SAME: (<2 x double> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> splat (double 4.500000e+00)) +; CHECK-NEXT: ret <2 x double> [[POW]] +; + %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> ) + ret <2 x double> %pow +} + +define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_5(<2 x double> %x) { +; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_5 +; CHECK-SAME: (<2 x double> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x double> [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x double> [[X]], [[__POWX21]] +; CHECK-NEXT: ret <2 x double> [[__POWPROD]] +; + %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> ) + ret <2 x double> %pow +} + +define half @test_pow_afn_f16__y_3(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16__y_3 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 3) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn half @_Z3powDhDh(half %x, half 3.0) + ret half %pow +} + +define half @test_pow_afn_f16_nnan_ninf__y_3(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_3 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn half [[X]], [[__POWX2]] +; CHECK-NEXT: ret half [[__POWPROD]] +; + %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 3.0) + ret half %pow +} + +define half @test_pow_afn_f16__y_4(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16__y_4 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 4) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn half @_Z3powDhDh(half %x, half 4.0) + ret half %pow +} + +define half @test_pow_afn_f16_nnan_ninf__y_4(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_4 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: ret half [[__POWX21]] +; + %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 4.0) + ret half %pow +} + +define half @test_pow_afn_f16_nnan_ninf__y_4_5(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_4_5 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn half @_Z3powDhDh(half [[X]], half 0xH4480) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 4.5) + ret half %pow +} + +define half @test_pow_afn_f16__y_5(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16__y_5 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 5) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn half @_Z3powDhDh(half %x, half 5.0) + ret half %pow +} + +define half @test_pow_afn_f16_nnan_ninf__y_5(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_5 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn half [[X]], [[__POWX21]] +; CHECK-NEXT: ret half [[__POWPROD]] +; + %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 5.0) + ret half %pow +} + +define half @test_pow_afn_f16__y_neg5(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16__y_neg5 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call afn half @_Z4pownDhi(half [[X]], i32 -5) +; CHECK-NEXT: ret half [[POW]] +; + %pow = tail call afn half @_Z3powDhDh(half %x, half -5.0) + ret half %pow +} + +define half @test_pow_afn_f16_nnan_ninf__y_neg5(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_neg5 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn half [[X]], [[__POWX21]] +; CHECK-NEXT: [[__1POWPROD:%.*]] = fdiv nnan ninf afn half 0xH3C00, [[__POWPROD]] +; CHECK-NEXT: ret half [[__1POWPROD]] +; + %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half -5.0) + ret half %pow +} + +define half @test_pow_afn_f16_nnan_ninf__y_10(half %x) { +; CHECK-LABEL: define half @test_pow_afn_f16_nnan_ninf__y_10 +; CHECK-SAME: (half [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn half [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWX22:%.*]] = fmul nnan ninf afn half [[__POWX21]], [[__POWX21]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn half [[__POWX2]], [[__POWX22]] +; CHECK-NEXT: ret half [[__POWPROD]] +; + %pow = tail call afn nnan ninf half @_Z3powDhDh(half %x, half 10.0) + ret half %pow +} + +define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_3(<2 x half> %x) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_3 +; CHECK-SAME: (<2 x half> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[X]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[__POWX2]] +; CHECK-NEXT: ret <2 x half> [[__POWPROD]] +; + %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> ) + ret <2 x half> %pow +} + +define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4(<2 x half> %x) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4 +; CHECK-SAME: (<2 x half> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x half> [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: ret <2 x half> [[__POWX21]] +; + %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> ) + ret <2 x half> %pow +} + +define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5(<2 x half> %x) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5 +; CHECK-SAME: (<2 x half> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> splat (half 0xH4480)) +; CHECK-NEXT: ret <2 x half> [[POW]] +; + %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> ) + ret <2 x half> %pow +} + +define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_5(<2 x half> %x) { +; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_5 +; CHECK-SAME: (<2 x half> [[X:%.*]]) { +; CHECK-NEXT: [[__POWX2:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[X]] +; CHECK-NEXT: [[__POWX21:%.*]] = fmul nnan ninf afn <2 x half> [[__POWX2]], [[__POWX2]] +; CHECK-NEXT: [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x half> [[X]], [[__POWX21]] +; CHECK-NEXT: ret <2 x half> [[__POWPROD]] +; + %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> ) + ret <2 x half> %pow +} + +define float @test_pow_f32_known_integral_sitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_f32_known_integral_sitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_f32_known_integral_sitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call afn float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; CHECK-NEXT: ret float [[TMP5]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_nnan_f32_known_integral_sitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call afn nnan float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_ninf_f32_known_integral_sitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_ninf_f32_known_integral_sitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call ninf afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call afn ninf float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_f32_known_integral_sitofp_finite_argument(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp_finite_argument +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call float @_Z3powff(float %x, float nofpclass(inf nan) %y.cast) + ret float %pow +} + +define float @test_pow_f32_known_integral_uitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_f32_known_integral_uitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = uitofp i32 %y to float + %pow = tail call float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_f32_known_integral_uitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_uitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = uitofp i32 %y to float + %pow = tail call afn float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp +; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; CHECK-NEXT: ret float [[TMP5]] +; + %y.cast = uitofp i32 %y to float + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +; cast from i256 may produce infinity so can't assume integer without ninf +define float @test_pow_afn_nnan_f32_known_integral_uitofp_i256(float %x, i256 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_uitofp_i256 +; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y_CAST]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = uitofp i256 %y to float + %pow = tail call afn nnan float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +; cast from i256 may produce infinity so can't assume integer without ninf +define float @test_pow_afn_nnan_f32_known_integral_sitofp_i256(float %x, i256 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp_i256 +; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z3powff(float [[X]], float [[Y_CAST]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i256 %y to float + %pow = tail call afn nnan float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i256 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256 +; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; CHECK-NEXT: ret float [[TMP5]] +; + %y.cast = uitofp i256 %y to float + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i256 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256 +; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; CHECK-NEXT: ret float [[TMP5]] +; + %y.cast = sitofp i256 %y to float + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x float> %x, <2 x i32> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32> +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[TMP5]] +; + %y.cast = sitofp <2 x i32> %y to <2 x float> + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_uitofp +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32> +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %y.cast = uitofp <2 x i32> %y to <2 x float> + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32> +; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %y.cast = uitofp <2 x i32> %y to <2 x float> + %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x float> %x, <2 x i32> %y) { +; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp +; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32> +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32> +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint <2 x i32> [[__POW_SIGN]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[TMP5]] +; + %y.cast = uitofp <2 x i32> %y to <2 x float> + %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> %y.cast) + ret <2 x float> %pow +} + +; Could fold to powr or pown +define float @test_pow_f32_known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_f32_known_positive_x__known_integral_sitofp +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4powrff(float [[X]], float [[Y_CAST]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_f32_known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_f32_known_positive_x__known_integral_sitofp +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y_CAST]]) +; CHECK-NEXT: ret float [[POW]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call afn float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp(float nofpclass(ninf nsub nnorm) %x, i32 %y) { +; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32__known_positive_x__known_integral_sitofp +; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]]) +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: ret float [[__EXP2]] +; + %y.cast = sitofp i32 %y to float + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y.cast) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_trunc_maybe_inf(float %x, float nofpclass(nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_maybe_inf +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.trunc.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_trunc_maybe_nan(float %x, float nofpclass(inf) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_maybe_nan +; CHECK-SAME: (float [[X:%.*]], float nofpclass(inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.trunc.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +; Cannot fold to pown, may still be inf +define float @test_pow_f32__y_known_integral_trunc_nnan_use(float %x, float %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_nnan_use +; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[POW:%.*]] = tail call nnan float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.trunc.f32(float %y.arg) + %pow = tail call nnan float @_Z3powff(float %x, float %y) + ret float %pow +} + +; Cannot fold to pown, may still be nan +define float @test_pow_f32__y_known_integral_trunc_ninf_use(float %x, float %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc_ninf_use +; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[POW:%.*]] = tail call ninf float @_Z3powff(float [[X]], float [[Y]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.trunc.f32(float %y.arg) + %pow = tail call ninf float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float %y.arg) { +; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc +; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) +; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] +; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) +; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[X]] to i32 +; CHECK-NEXT: [[__POW_SIGN:%.*]] = and i32 [[__YEVEN]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[__EXP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i32 [[__POW_SIGN]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; CHECK-NEXT: ret float [[TMP5]] +; + %y = call float @llvm.trunc.f32(float %y.arg) + %pow = tail call afn nnan ninf float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_afn_f32__y_known_integral_trunc(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_afn_f32__y_known_integral_trunc +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.trunc.f32(float %y.arg) + %pow = tail call afn float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_floor(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_floor +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.floor.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.floor.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_ceil(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_ceil +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.floor.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.floor.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_trunc(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_trunc +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.trunc.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_rint(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_rint +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.rint.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.rint.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_nearbyint(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_nearbyint +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.nearbyint.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.nearbyint.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_round(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_round +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.round.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.round.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32__y_known_integral_roundeven(float %x, float nofpclass(inf nan) %y.arg) { +; CHECK-LABEL: define float @test_pow_f32__y_known_integral_roundeven +; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { +; CHECK-NEXT: [[Y:%.*]] = call float @llvm.roundeven.f32(float [[Y_ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 [[TMP1]]) +; CHECK-NEXT: ret float [[POW]] +; + %y = call float @llvm.roundeven.f32(float %y.arg) + %pow = tail call float @_Z3powff(float %x, float %y) + ret float %pow +} + +define float @test_pow_f32_known_integral_undef(float %x) { +; CHECK-LABEL: define float @test_pow_f32_known_integral_undef +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z3powff(float [[X]], float undef) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float undef) + ret float %pow +} + +define float @test_pow_f32_known_integral_poison(float %x) { +; CHECK-LABEL: define float @test_pow_f32_known_integral_poison +; CHECK-SAME: (float [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call float @_Z4pownfi(float [[X]], i32 poison) +; CHECK-NEXT: ret float [[POW]] +; + %pow = tail call float @_Z3powff(float %x, float poison) + ret float %pow +} + +define <2 x float> @test_pow_v2f32_known_integral_constant_vector_undef_elt(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_constant_vector_undef_elt +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +define <2 x float> @test_pow_v2f32_known_integral_constant_vector_poison_elt(<2 x float> %x) { +; CHECK-LABEL: define <2 x float> @test_pow_v2f32_known_integral_constant_vector_poison_elt +; CHECK-SAME: (<2 x float> [[X:%.*]]) { +; CHECK-NEXT: [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> ) +; CHECK-NEXT: ret <2 x float> [[POW]] +; + %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> ) + ret <2 x float> %pow +} + +attributes #0 = { minsize } +attributes #1 = { noinline } +attributes #2 = { strictfp } +attributes #3 = { nobuiltin } diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll index a01c2fa152ab3..32f6b11e41d8c 100644 --- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll +++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll @@ -336,13 +336,13 @@ define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, 0x800000 -; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, s4 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 32, 0, s[4:5] ; GFX9-NEXT: v_ldexp_f32 v3, |v0|, v3 ; GFX9-NEXT: v_log_f32_e32 v3, v3 ; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] ; GFX9-NEXT: v_sub_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_f32_e32 v3, v2, v1 ; GFX9-NEXT: s_mov_b32 s4, 0xc2fc0000 diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll index 3983655285e57..b20a45237eac5 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll @@ -1515,12 +1515,12 @@ define float @v_recip_sqrt_f32_ulp25(float %x) { ; CODEGEN-IEEE-GISEL: ; %bb.0: ; CODEGEN-IEEE-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CODEGEN-IEEE-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 -; CODEGEN-IEEE-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CODEGEN-IEEE-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1 +; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; CODEGEN-IEEE-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; CODEGEN-IEEE-GISEL-NEXT: v_sqrt_f32_e32 v0, v0 -; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc +; CODEGEN-IEEE-GISEL-NEXT: v_cndmask_b32_e64 v1, -16, 0, vcc ; CODEGEN-IEEE-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; CODEGEN-IEEE-GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0 ; CODEGEN-IEEE-GISEL-NEXT: v_rcp_f32_e32 v1, v1 diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll index 87c7cce854b11..c5d5b7c92d259 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll @@ -2326,12 +2326,12 @@ define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) { ; GISEL-IEEE: ; %bb.0: ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, v0, v1 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 1, 0, vcc ; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, -16, 0, vcc ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0 @@ -2583,12 +2583,12 @@ define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) { ; GISEL-IEEE: ; %bb.0: ; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GISEL-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 1, 0, vcc ; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 -; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc +; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, -16, 0, vcc ; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 @@ -3999,8 +3999,8 @@ define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %i ; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v1, 1 ; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, s2, v1 +; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GISEL-IEEE-NEXT: s_endpgm ; @@ -4061,8 +4061,8 @@ define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %i ; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v1, 1 ; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-DAZ-NEXT: v_cmp_ge_f32_e32 vcc, s2, v1 +; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GISEL-DAZ-NEXT: s_mov_b32 s2, -1 ; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GISEL-DAZ-NEXT: s_endpgm @@ -4133,8 +4133,8 @@ define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %i ; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 ; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, s2, 0 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 vcc, s2, 0 +; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GISEL-IEEE-NEXT: s_endpgm ; @@ -4194,8 +4194,8 @@ define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %i ; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 ; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 -; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, s2, 0 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GISEL-DAZ-NEXT: v_cmp_ge_f32_e64 vcc, s2, 0 +; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GISEL-DAZ-NEXT: s_mov_b32 s2, -1 ; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GISEL-DAZ-NEXT: s_endpgm @@ -4434,10 +4434,10 @@ define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float ; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v3, 1 ; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc ; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc -; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s7, v3 -; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GISEL-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, s6, v3 +; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GISEL-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, s7, v3 +; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GISEL-IEEE-NEXT: s_mov_b32 s6, -1 ; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000 ; GISEL-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -4530,10 +4530,10 @@ define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float ; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v3, 1 ; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc ; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc -; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s7, v3 -; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GISEL-DAZ-NEXT: v_cmp_ge_f32_e32 vcc, s6, v3 +; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc +; GISEL-DAZ-NEXT: v_cmp_ge_f32_e32 vcc, s7, v3 +; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; GISEL-DAZ-NEXT: s_mov_b32 s6, -1 ; GISEL-DAZ-NEXT: s_mov_b32 s7, 0xf000 ; GISEL-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll index fdccacf372dfa..1b98e0b6674f4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll @@ -69,11 +69,11 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 @@ -123,13 +123,13 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-GISEL-NEXT: s_endpgm @@ -179,13 +179,13 @@ define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm @@ -424,18 +424,18 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-GISEL-NEXT: s_endpgm @@ -504,16 +504,16 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm @@ -585,16 +585,16 @@ define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v5, v1 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -960,10 +960,10 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v3 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v4 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0 @@ -980,17 +980,17 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6 ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-GISEL-NEXT: s_endpgm @@ -1080,23 +1080,23 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm @@ -1189,24 +1189,24 @@ define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v6, v2 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm @@ -1719,9 +1719,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v5 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0 @@ -1751,23 +1751,23 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8 ; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v5 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-GISEL-NEXT: s_endpgm @@ -1855,7 +1855,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1 @@ -1865,9 +1865,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v5 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6 @@ -1877,9 +1877,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -1889,17 +1889,17 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm @@ -1989,7 +1989,7 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 @@ -2000,9 +2000,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v5 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v6 ; SI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 ; SI-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6 @@ -2012,9 +2012,9 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v8 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; SI-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -2024,17 +2024,17 @@ define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm @@ -2565,11 +2565,11 @@ define float @v_exp_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32: @@ -2611,11 +2611,11 @@ define float @v_exp_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32: @@ -2657,11 +2657,11 @@ define float @v_exp_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32: @@ -2725,11 +2725,11 @@ define float @v_exp_fabs_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_fabs_f32: @@ -2771,11 +2771,11 @@ define float @v_exp_fabs_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fabs_f32: @@ -2817,11 +2817,11 @@ define float @v_exp_fabs_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fabs_f32: @@ -2886,11 +2886,11 @@ define float @v_exp_fneg_fabs_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -|v0|, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -|v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_fneg_fabs_f32: @@ -2932,11 +2932,11 @@ define float @v_exp_fneg_fabs_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -|v0|, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -|v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_fabs_f32: @@ -2978,11 +2978,11 @@ define float @v_exp_fneg_fabs_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -|v0|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -|v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fneg_fabs_f32: @@ -3048,11 +3048,11 @@ define float @v_exp_fneg_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_fneg_f32: @@ -3094,11 +3094,11 @@ define float @v_exp_fneg_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_fneg_f32: @@ -3140,11 +3140,11 @@ define float @v_exp_fneg_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_fneg_f32: @@ -3400,8 +3400,8 @@ define float @v_exp_f32_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_ninf: @@ -3438,8 +3438,8 @@ define float @v_exp_f32_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf: @@ -3476,8 +3476,8 @@ define float @v_exp_f32_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf: @@ -3770,11 +3770,11 @@ define float @v_exp_f32_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_daz: @@ -3816,11 +3816,11 @@ define float @v_exp_f32_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_daz: @@ -3862,11 +3862,11 @@ define float @v_exp_f32_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_daz: @@ -3928,11 +3928,11 @@ define float @v_exp_f32_nnan(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan: @@ -3974,11 +3974,11 @@ define float @v_exp_f32_nnan(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan: @@ -4020,11 +4020,11 @@ define float @v_exp_f32_nnan(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan: @@ -4086,11 +4086,11 @@ define float @v_exp_f32_nnan_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz: @@ -4132,11 +4132,11 @@ define float @v_exp_f32_nnan_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_daz: @@ -4178,11 +4178,11 @@ define float @v_exp_f32_nnan_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_daz: @@ -4244,11 +4244,11 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic: @@ -4290,11 +4290,11 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic: @@ -4336,11 +4336,11 @@ define float @v_exp_f32_nnan_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_dynamic: @@ -4397,8 +4397,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz: @@ -4435,8 +4435,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf_daz: @@ -4473,8 +4473,8 @@ define float @v_exp_f32_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf_daz: @@ -4531,8 +4531,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic: @@ -4569,8 +4569,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic: @@ -4607,8 +4607,8 @@ define float @v_exp_f32_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_ninf_dynamic: @@ -4665,8 +4665,8 @@ define float @v_exp_f32_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf: @@ -4703,8 +4703,8 @@ define float @v_exp_f32_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf: @@ -4741,8 +4741,8 @@ define float @v_exp_f32_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf: @@ -4799,8 +4799,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: @@ -4837,8 +4837,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: @@ -4875,8 +4875,8 @@ define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf_daz: @@ -4933,8 +4933,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: @@ -4971,8 +4971,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: @@ -5009,8 +5009,8 @@ define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_nnan_ninf_dynamic: @@ -5100,11 +5100,11 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode: @@ -5146,11 +5146,11 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_dynamic_mode: @@ -5192,11 +5192,11 @@ define float @v_exp_f32_dynamic_mode(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_dynamic_mode: @@ -5244,11 +5244,11 @@ define float @v_exp_f32_undef() { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s4, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_undef: @@ -5283,11 +5283,11 @@ define float @v_exp_f32_undef() { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s4, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_undef: @@ -5322,11 +5322,11 @@ define float @v_exp_f32_undef() { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s4, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_undef: @@ -5416,11 +5416,11 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_f16: @@ -5462,13 +5462,13 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16: @@ -5510,13 +5510,13 @@ define float @v_exp_f32_from_fpext_f16(i16 %src.i) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v3 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_f16: @@ -5584,11 +5584,11 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: @@ -5634,11 +5634,11 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: @@ -5688,11 +5688,11 @@ define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_math_f16: @@ -5905,11 +5905,11 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: @@ -5955,11 +5955,11 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: @@ -6009,11 +6009,11 @@ define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_from_fpext_math_f16_daz: @@ -7032,11 +7032,11 @@ define float @v_exp_f32_contract(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_contract: @@ -7078,11 +7078,11 @@ define float @v_exp_f32_contract(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_contract: @@ -7124,11 +7124,11 @@ define float @v_exp_f32_contract(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_contract: @@ -7190,11 +7190,11 @@ define float @v_exp_f32_contract_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_contract_daz: @@ -7236,11 +7236,11 @@ define float @v_exp_f32_contract_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_contract_daz: @@ -7282,11 +7282,11 @@ define float @v_exp_f32_contract_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_contract_daz: @@ -7343,8 +7343,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: @@ -7381,8 +7381,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: @@ -7419,8 +7419,8 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp_f32_contract_nnan_ninf: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll index 0c2e6f82c9115..35040d479f71d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll @@ -71,11 +71,11 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 @@ -125,13 +125,13 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-GISEL-NEXT: s_endpgm @@ -181,13 +181,13 @@ define amdgpu_kernel void @s_exp10_f32(ptr addrspace(1) %out, float %in) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm @@ -426,18 +426,18 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 ; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-GISEL-NEXT: s_endpgm @@ -506,16 +506,16 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-GISEL-NEXT: s_endpgm @@ -587,16 +587,16 @@ define amdgpu_kernel void @s_exp10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v5, v1 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -962,10 +962,10 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v3 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v3 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v4 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0 @@ -982,17 +982,17 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6 ; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v3 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v4 ; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 ; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-GISEL-NEXT: s_endpgm @@ -1082,23 +1082,23 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm @@ -1191,24 +1191,24 @@ define amdgpu_kernel void @s_exp10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 ; SI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x421a209b ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v3 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v6, v2 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v3 ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-GISEL-NEXT: s_endpgm @@ -1721,9 +1721,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v5 ; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 ; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0 @@ -1753,23 +1753,23 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 ; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 ; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 ; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8 ; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v5 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v5 ; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc ; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 ; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-GISEL-NEXT: s_endpgm @@ -1857,7 +1857,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1 @@ -1867,9 +1867,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v5 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6 @@ -1879,9 +1879,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -1891,17 +1891,17 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v5 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc ; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-GISEL-NEXT: s_endpgm @@ -1991,7 +1991,7 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s0, v4 ; SI-GISEL-NEXT: s_mov_b32 s6, -1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 @@ -2002,9 +2002,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s0, v5 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v6 ; SI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 ; SI-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6 @@ -2014,9 +2014,9 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 ; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s1, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v8 ; SI-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 ; SI-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 @@ -2026,17 +2026,17 @@ define amdgpu_kernel void @s_exp10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 ; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2 -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s1, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s2, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v6, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s2, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc ; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s3, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s3, v5 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc ; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 ; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-GISEL-NEXT: s_endpgm @@ -2567,11 +2567,11 @@ define float @v_exp10_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32: @@ -2613,11 +2613,11 @@ define float @v_exp10_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32: @@ -2659,11 +2659,11 @@ define float @v_exp10_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32: @@ -2727,11 +2727,11 @@ define float @v_exp10_fabs_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, |v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_fabs_f32: @@ -2773,11 +2773,11 @@ define float @v_exp10_fabs_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e64 vcc, |v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_fabs_f32: @@ -2819,11 +2819,11 @@ define float @v_exp10_fabs_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, |v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_fabs_f32: @@ -2888,11 +2888,11 @@ define float @v_exp10_fneg_fabs_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -|v0|, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -|v0|, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_fneg_fabs_f32: @@ -2934,11 +2934,11 @@ define float @v_exp10_fneg_fabs_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -|v0|, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -|v0|, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_fneg_fabs_f32: @@ -2980,11 +2980,11 @@ define float @v_exp10_fneg_fabs_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -|v0|, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -|v0|, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_fneg_fabs_f32: @@ -3050,11 +3050,11 @@ define float @v_exp10_fneg_f32(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_fneg_f32: @@ -3096,11 +3096,11 @@ define float @v_exp10_fneg_f32(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_fneg_f32: @@ -3142,11 +3142,11 @@ define float @v_exp10_fneg_f32(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e64 vcc, -v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] -; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e64 vcc, -v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_fneg_f32: @@ -3420,8 +3420,8 @@ define float @v_exp10_f32_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_ninf: @@ -3458,8 +3458,8 @@ define float @v_exp10_f32_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_ninf: @@ -3496,8 +3496,8 @@ define float @v_exp10_f32_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_ninf: @@ -3828,11 +3828,11 @@ define float @v_exp10_f32_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_daz: @@ -3874,11 +3874,11 @@ define float @v_exp10_f32_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_daz: @@ -3920,11 +3920,11 @@ define float @v_exp10_f32_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_daz: @@ -3986,11 +3986,11 @@ define float @v_exp10_f32_nnan(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan: @@ -4032,11 +4032,11 @@ define float @v_exp10_f32_nnan(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan: @@ -4078,11 +4078,11 @@ define float @v_exp10_f32_nnan(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan: @@ -4144,11 +4144,11 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_daz: @@ -4190,11 +4190,11 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_daz: @@ -4236,11 +4236,11 @@ define float @v_exp10_f32_nnan_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_daz: @@ -4302,11 +4302,11 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_dynamic: @@ -4348,11 +4348,11 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_dynamic: @@ -4394,11 +4394,11 @@ define float @v_exp10_f32_nnan_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_dynamic: @@ -4455,8 +4455,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_daz: @@ -4493,8 +4493,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_ninf_daz: @@ -4531,8 +4531,8 @@ define float @v_exp10_f32_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_ninf_daz: @@ -4589,8 +4589,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_ninf_dynamic: @@ -4627,8 +4627,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_ninf_dynamic: @@ -4665,8 +4665,8 @@ define float @v_exp10_f32_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_ninf_dynamic: @@ -4723,8 +4723,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf: @@ -4761,8 +4761,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf: @@ -4799,8 +4799,8 @@ define float @v_exp10_f32_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_ninf: @@ -4857,8 +4857,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz: @@ -4895,8 +4895,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_daz: @@ -4933,8 +4933,8 @@ define float @v_exp10_f32_nnan_ninf_daz(float %in) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_ninf_daz: @@ -4991,8 +4991,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic: @@ -5029,8 +5029,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_nnan_ninf_dynamic: @@ -5067,8 +5067,8 @@ define float @v_exp10_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_nnan_ninf_dynamic: @@ -5178,11 +5178,11 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_dynamic_mode: @@ -5224,11 +5224,11 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_dynamic_mode: @@ -5270,11 +5270,11 @@ define float @v_exp10_f32_dynamic_mode(float %in) #1 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_dynamic_mode: @@ -5322,11 +5322,11 @@ define float @v_exp10_f32_undef() { ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s4, v1 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s4, v1 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_undef: @@ -5361,11 +5361,11 @@ define float @v_exp10_f32_undef() { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s4, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s4, v1 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_undef: @@ -5400,11 +5400,11 @@ define float @v_exp10_f32_undef() { ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, s4, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, s4, v1 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_undef: @@ -5494,11 +5494,11 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_f16: @@ -5540,13 +5540,13 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) { ; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v3 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_from_fpext_f16: @@ -5588,13 +5588,13 @@ define float @v_exp10_f32_from_fpext_f16(i16 %src.i) { ; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v3 ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_from_fpext_f16: @@ -5662,11 +5662,11 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16: @@ -5712,11 +5712,11 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16: @@ -5766,11 +5766,11 @@ define float @v_exp10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_from_fpext_math_f16: @@ -5998,11 +5998,11 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz: @@ -6048,11 +6048,11 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_from_fpext_math_f16_daz: @@ -6102,11 +6102,11 @@ define float @v_exp10_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 ; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x421a209b -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v4 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_from_fpext_math_f16_daz: @@ -7126,11 +7126,11 @@ define float @v_exp10_f32_contract(float %in) { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_contract: @@ -7172,11 +7172,11 @@ define float @v_exp10_f32_contract(float %in) { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_contract: @@ -7218,11 +7218,11 @@ define float @v_exp10_f32_contract(float %in) { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_contract: @@ -7284,11 +7284,11 @@ define float @v_exp10_f32_contract_daz(float %in) #0 { ; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_contract_daz: @@ -7330,11 +7330,11 @@ define float @v_exp10_f32_contract_daz(float %in) #0 { ; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_contract_daz: @@ -7376,11 +7376,11 @@ define float @v_exp10_f32_contract_daz(float %in) #0 { ; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x421a209b -; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc +; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-GISEL-NEXT: v_cmp_le_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_contract_daz: @@ -7437,8 +7437,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) { ; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; VI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf: @@ -7475,8 +7475,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) { ; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; GFX900-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SI-SDAG-LABEL: v_exp10_f32_contract_nnan_ninf: @@ -7513,8 +7513,8 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) { ; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 ; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc23369f4 -; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 -; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc +; SI-GISEL-NEXT: v_cmp_ge_f32_e32 vcc, v0, v2 +; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp10_f32_contract_nnan_ninf: