diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 871456d2a55b5..94e36e412b0cf 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -864,6 +864,9 @@ class CombinerHelper { /// Combine select to integer min/max. bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const; + /// Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)). + bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const; + /// Combine selects. bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index cb5a4c14b364c..a35ecae5d18bf 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -171,6 +171,10 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); +/// Returns the inverse opcode of \p MinMaxOpc, which is a generic min/max +/// opcode like G_SMIN. +unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc); + /// If \p VReg is defined by a G_CONSTANT, return the corresponding value. std::optional getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 013c3a6ed83d8..8641eabbdd84c 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1372,6 +1372,12 @@ def select_to_iminmax: GICombineRule< [{ return Helper.matchSelectIMinMax(${root}, ${info}); }]), (apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>; +def simplify_neg_minmax : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SUB):$root, + [{ return Helper.matchSimplifyNegMinMax(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def match_selects : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (wip_match_opcode G_SELECT):$root, @@ -2008,7 +2014,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, and_or_disjoint_mask, fma_combines, fold_binop_into_select, sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, - combine_concat_vector, + simplify_neg_minmax, combine_concat_vector, sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines, combine_use_vector_truncate, merge_combines, overflow_combines]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index c061c01d3c1b1..4e3aaf5da7198 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7062,6 +7062,34 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO, } } +// (neg (min/max x, (neg x))) --> (max/min x, (neg x)) +bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI, + BuildFnTy &MatchInfo) const { + assert(MI.getOpcode() == TargetOpcode::G_SUB); + Register DestReg = MI.getOperand(0).getReg(); + LLT DestTy = MRI.getType(DestReg); + + Register X; + Register Sub0; + auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0)); + if (mi_match(DestReg, MRI, + m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern), + m_GSMax(m_Reg(X), NegPattern), + m_GUMin(m_Reg(X), NegPattern), + m_GUMax(m_Reg(X), NegPattern)))))) { + MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode()); + if (isLegal({NewOpc, {DestTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(NewOpc, {DestReg}, {X, Sub0}); + }; + return true; + } + } + + return false; +} + bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const { GSelect *Select = cast(&MI); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 79382933a1f42..625d556e3ff5e 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } +unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + case TargetOpcode::G_SMIN: + return TargetOpcode::G_SMAX; + case TargetOpcode::G_SMAX: + return TargetOpcode::G_SMIN; + case TargetOpcode::G_UMIN: + return TargetOpcode::G_UMAX; + case TargetOpcode::G_UMAX: + return TargetOpcode::G_UMIN; + default: + llvm_unreachable("unrecognized opcode"); + } +} + std::optional llvm::getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI) { std::optional ValAndVReg = getIConstantVRegValWithLookThrough( diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index 030613a7d8904..995dd0c5d82eb 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -25,5 +25,5 @@ def RISCVPostLegalizerCombiner : GICombiner<"RISCVPostLegalizerCombinerImpl", [sub_to_add, combines_for_extload, redundant_and, identity_combines, shift_immed_chain, - commute_constant_to_rhs]> { + commute_constant_to_rhs, simplify_neg_minmax]> { } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll new file mode 100644 index 0000000000000..6c848ecf0fffd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll @@ -0,0 +1,457 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32I +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32ZBB +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64I +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64ZBB + +define i32 @expanded_neg_abs32(i32 %x) { +; RV32I-LABEL: expanded_neg_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: blt a0, a1, .LBB0_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: min a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: blt a3, a2, .LBB0_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB0_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: max a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.smax.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i32 @expanded_neg_abs32_unsigned(i32 %x) { +; RV32I-LABEL: expanded_neg_abs32_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: bltu a0, a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs32_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: minu a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs32_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: bltu a3, a2, .LBB1_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB1_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs32_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: maxu a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.umax.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i64 @expanded_neg_abs64(i64 %x) { +; RV32I-LABEL: expanded_neg_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a1, a2 +; RV32I-NEXT: beqz a4, .LBB2_3 +; RV32I-NEXT: j .LBB2_4 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: bnez a4, .LBB2_4 +; RV32I-NEXT: .LBB2_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB2_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB2_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: slt a4, a1, a2 +; RV32ZBB-NEXT: beqz a4, .LBB2_3 +; RV32ZBB-NEXT: j .LBB2_4 +; RV32ZBB-NEXT: .LBB2_2: +; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: bnez a4, .LBB2_4 +; RV32ZBB-NEXT: .LBB2_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB2_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: blt a0, a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: min a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.smax.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i64 @expanded_neg_abs64_unsigned(i64 %x) { +; RV32I-LABEL: expanded_neg_abs64_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a1, a2 +; RV32I-NEXT: beqz a4, .LBB3_3 +; RV32I-NEXT: j .LBB3_4 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: bnez a4, .LBB3_4 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB3_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs64_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB3_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sltu a4, a1, a2 +; RV32ZBB-NEXT: beqz a4, .LBB3_3 +; RV32ZBB-NEXT: j .LBB3_4 +; RV32ZBB-NEXT: .LBB3_2: +; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: bnez a4, .LBB3_4 +; RV32ZBB-NEXT: .LBB3_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB3_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs64_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: bltu a0, a1, .LBB3_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs64_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: minu a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.umax.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i32 @expanded_neg_inv_abs32(i32 %x) { +; RV32I-LABEL: expanded_neg_inv_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: blt a1, a0, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: max a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: blt a2, a3, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: min a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.smin.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) { +; RV32I-LABEL: expanded_neg_inv_abs32_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: bltu a1, a0, .LBB5_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: maxu a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs32_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: bltu a2, a3, .LBB5_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: minu a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.umin.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i64 @expanded_neg_inv_abs64(i64 %x) { +; RV32I-LABEL: expanded_neg_inv_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a2, a1 +; RV32I-NEXT: beqz a4, .LBB6_3 +; RV32I-NEXT: j .LBB6_4 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: bnez a4, .LBB6_4 +; RV32I-NEXT: .LBB6_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB6_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB6_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: slt a4, a2, a1 +; RV32ZBB-NEXT: beqz a4, .LBB6_3 +; RV32ZBB-NEXT: j .LBB6_4 +; RV32ZBB-NEXT: .LBB6_2: +; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: bnez a4, .LBB6_4 +; RV32ZBB-NEXT: .LBB6_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB6_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: blt a1, a0, .LBB6_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.smin.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) { +; RV32I-LABEL: expanded_neg_inv_abs64_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: beqz a4, .LBB7_3 +; RV32I-NEXT: j .LBB7_4 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: bnez a4, .LBB7_4 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB7_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB7_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sltu a4, a2, a1 +; RV32ZBB-NEXT: beqz a4, .LBB7_3 +; RV32ZBB-NEXT: j .LBB7_4 +; RV32ZBB-NEXT: .LBB7_2: +; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: bnez a4, .LBB7_4 +; RV32ZBB-NEXT: .LBB7_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB7_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs64_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: bltu a1, a0, .LBB7_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: maxu a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.umin.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +}