From c05fd18f226ea84f9cdc35be52db4e32189e49dc Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Mon, 23 Dec 2024 11:46:16 -0800 Subject: [PATCH 1/5] [GISel] Combine `(neg (max x, (neg x)))` into `(min x, (neg x))` This is the GISel version of #120666. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 + llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 4 + .../include/llvm/Target/GlobalISel/Combine.td | 8 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 36 ++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 15 + llvm/lib/Target/RISCV/RISCVCombine.td | 2 +- .../RISCV/GlobalISel/combine-neg-abs.ll | 457 ++++++++++++++++++ 7 files changed, 523 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 871456d2a55b5..94e36e412b0cf 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -864,6 +864,9 @@ class CombinerHelper { /// Combine select to integer min/max. bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const; + /// Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)). + bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const; + /// Combine selects. bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index cb5a4c14b364c..c4ba732879f40 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -171,6 +171,10 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); +/// Returns the inverse opcode of \p MinMaxOpc , which is a generic min/max +/// opcode like G_SMIN. +unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc); + /// If \p VReg is defined by a G_CONSTANT, return the corresponding value. std::optional getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 013c3a6ed83d8..8641eabbdd84c 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1372,6 +1372,12 @@ def select_to_iminmax: GICombineRule< [{ return Helper.matchSelectIMinMax(${root}, ${info}); }]), (apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>; +def simplify_neg_minmax : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_SUB):$root, + [{ return Helper.matchSimplifyNegMinMax(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + def match_selects : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (wip_match_opcode G_SELECT):$root, @@ -2008,7 +2014,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, and_or_disjoint_mask, fma_combines, fold_binop_into_select, sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, - combine_concat_vector, + simplify_neg_minmax, combine_concat_vector, sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines, combine_use_vector_truncate, merge_combines, overflow_combines]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index c061c01d3c1b1..71f9fab9419de 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7062,6 +7062,42 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO, } } +// (neg (min/max x, (neg x))) --> (max/min x, (neg x)) +bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI, + BuildFnTy &MatchInfo) const { + assert(MI.getOpcode() == TargetOpcode::G_SUB); + Register DestReg = MI.getOperand(0).getReg(); + LLT DestTy = MRI.getType(DestReg); + if (!isLegal({TargetOpcode::G_SUB, {DestTy}})) + return false; + + // GISel doesn't have m_Deferred at this moment, so we have to + // match this pattern in two phases. + Register X, Y; + Register Sub0; + if (mi_match(DestReg, MRI, + m_Neg(m_OneUse(m_any_of( + m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)), + m_CommutativeBinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)), + m_CommutativeBinOp(TargetOpcode::G_UMAX, m_Reg(X), + m_Reg(Y)))))) && + (mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) || + mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) { + MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + MachineInstr *Sub0MI = MRI.getVRegDef(Sub0); + X = Sub0MI->getOperand(2).getReg(); + unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode()); + if (isLegal({NewOpc, {DestTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(NewOpc, {DestReg}, {X, Sub0}); + }; + return true; + } + } + + return false; +} + bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const { GSelect *Select = cast(&MI); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 79382933a1f42..625d556e3ff5e 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } +unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + case TargetOpcode::G_SMIN: + return TargetOpcode::G_SMAX; + case TargetOpcode::G_SMAX: + return TargetOpcode::G_SMIN; + case TargetOpcode::G_UMIN: + return TargetOpcode::G_UMAX; + case TargetOpcode::G_UMAX: + return TargetOpcode::G_UMIN; + default: + llvm_unreachable("unrecognized opcode"); + } +} + std::optional llvm::getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI) { std::optional ValAndVReg = getIConstantVRegValWithLookThrough( diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index 030613a7d8904..995dd0c5d82eb 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -25,5 +25,5 @@ def RISCVPostLegalizerCombiner : GICombiner<"RISCVPostLegalizerCombinerImpl", [sub_to_add, combines_for_extload, redundant_and, identity_combines, shift_immed_chain, - commute_constant_to_rhs]> { + commute_constant_to_rhs, simplify_neg_minmax]> { } diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll new file mode 100644 index 0000000000000..6c848ecf0fffd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll @@ -0,0 +1,457 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32I +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV32ZBB +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64I +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=RV64ZBB + +define i32 @expanded_neg_abs32(i32 %x) { +; RV32I-LABEL: expanded_neg_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: blt a0, a1, .LBB0_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: min a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: blt a3, a2, .LBB0_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB0_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: max a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.smax.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i32 @expanded_neg_abs32_unsigned(i32 %x) { +; RV32I-LABEL: expanded_neg_abs32_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: bltu a0, a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs32_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: minu a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs32_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: bltu a3, a2, .LBB1_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB1_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs32_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: maxu a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.umax.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i64 @expanded_neg_abs64(i64 %x) { +; RV32I-LABEL: expanded_neg_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a1, a2 +; RV32I-NEXT: beqz a4, .LBB2_3 +; RV32I-NEXT: j .LBB2_4 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: bnez a4, .LBB2_4 +; RV32I-NEXT: .LBB2_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB2_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB2_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: slt a4, a1, a2 +; RV32ZBB-NEXT: beqz a4, .LBB2_3 +; RV32ZBB-NEXT: j .LBB2_4 +; RV32ZBB-NEXT: .LBB2_2: +; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: bnez a4, .LBB2_4 +; RV32ZBB-NEXT: .LBB2_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB2_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: blt a0, a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: min a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.smax.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i64 @expanded_neg_abs64_unsigned(i64 %x) { +; RV32I-LABEL: expanded_neg_abs64_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a1, a2 +; RV32I-NEXT: beqz a4, .LBB3_3 +; RV32I-NEXT: j .LBB3_4 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: bnez a4, .LBB3_4 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB3_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs64_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB3_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sltu a4, a1, a2 +; RV32ZBB-NEXT: beqz a4, .LBB3_3 +; RV32ZBB-NEXT: j .LBB3_4 +; RV32ZBB-NEXT: .LBB3_2: +; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: bnez a4, .LBB3_4 +; RV32ZBB-NEXT: .LBB3_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB3_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs64_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: bltu a0, a1, .LBB3_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB3_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs64_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: minu a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.umax.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i32 @expanded_neg_inv_abs32(i32 %x) { +; RV32I-LABEL: expanded_neg_inv_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: blt a1, a0, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: max a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: blt a2, a3, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: min a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.smin.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) { +; RV32I-LABEL: expanded_neg_inv_abs32_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: bltu a1, a0, .LBB5_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: maxu a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs32_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: sext.w a2, a1 +; RV64I-NEXT: sext.w a3, a0 +; RV64I-NEXT: bltu a2, a3, .LBB5_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: negw a1, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: minu a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.umin.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i64 @expanded_neg_inv_abs64(i64 %x) { +; RV32I-LABEL: expanded_neg_inv_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a2, a1 +; RV32I-NEXT: beqz a4, .LBB6_3 +; RV32I-NEXT: j .LBB6_4 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: bnez a4, .LBB6_4 +; RV32I-NEXT: .LBB6_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB6_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB6_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: slt a4, a2, a1 +; RV32ZBB-NEXT: beqz a4, .LBB6_3 +; RV32ZBB-NEXT: j .LBB6_4 +; RV32ZBB-NEXT: .LBB6_2: +; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: bnez a4, .LBB6_4 +; RV32ZBB-NEXT: .LBB6_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB6_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: blt a1, a0, .LBB6_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.smin.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) { +; RV32I-LABEL: expanded_neg_inv_abs64_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: beqz a4, .LBB7_3 +; RV32I-NEXT: j .LBB7_4 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: bnez a4, .LBB7_4 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: .LBB7_4: +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: snez a1, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB7_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sltu a4, a2, a1 +; RV32ZBB-NEXT: beqz a4, .LBB7_3 +; RV32ZBB-NEXT: j .LBB7_4 +; RV32ZBB-NEXT: .LBB7_2: +; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: bnez a4, .LBB7_4 +; RV32ZBB-NEXT: .LBB7_3: +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: .LBB7_4: +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: snez a1, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs64_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: bltu a1, a0, .LBB7_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: maxu a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.umin.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} From ae586ecf1a738b529426b491cc34d15f9994e06b Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Mon, 23 Dec 2024 12:56:22 -0800 Subject: [PATCH 2/5] Use m_BinOp instead of m_CommutativeBinOp Because the way we match it, it doesn't matter whether it's commutative or not. --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 71f9fab9419de..3ab9ab23a8eff 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7078,9 +7078,8 @@ bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI, if (mi_match(DestReg, MRI, m_Neg(m_OneUse(m_any_of( m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)), - m_CommutativeBinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)), - m_CommutativeBinOp(TargetOpcode::G_UMAX, m_Reg(X), - m_Reg(Y)))))) && + m_BinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)), + m_BinOp(TargetOpcode::G_UMAX, m_Reg(X), m_Reg(Y)))))) && (mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) || mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) { MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg()); From 1bcb38120a5411879a5ca8540825b7e8ad93af3b Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Tue, 24 Dec 2024 11:44:44 -0800 Subject: [PATCH 3/5] fixup! Use m_BinOp instead of m_CommutativeBinOp --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index c4ba732879f40..a35ecae5d18bf 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -171,7 +171,7 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); -/// Returns the inverse opcode of \p MinMaxOpc , which is a generic min/max +/// Returns the inverse opcode of \p MinMaxOpc, which is a generic min/max /// opcode like G_SMIN. unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc); From e1f5da5aae9db4b736084bd6ba0aea621365ab5c Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Thu, 2 Jan 2025 13:43:03 -0800 Subject: [PATCH 4/5] fixup! Apply m_DeferredReg --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 3ab9ab23a8eff..78f9730fbbcd9 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7071,20 +7071,15 @@ bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI, if (!isLegal({TargetOpcode::G_SUB, {DestTy}})) return false; - // GISel doesn't have m_Deferred at this moment, so we have to - // match this pattern in two phases. - Register X, Y; + Register X; Register Sub0; + auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0)); if (mi_match(DestReg, MRI, - m_Neg(m_OneUse(m_any_of( - m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)), - m_BinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)), - m_BinOp(TargetOpcode::G_UMAX, m_Reg(X), m_Reg(Y)))))) && - (mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) || - mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) { + m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern), + m_GSMax(m_Reg(X), NegPattern), + m_GUMin(m_Reg(X), NegPattern), + m_GUMax(m_Reg(X), NegPattern)))))) { MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg()); - MachineInstr *Sub0MI = MRI.getVRegDef(Sub0); - X = Sub0MI->getOperand(2).getReg(); unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode()); if (isLegal({NewOpc, {DestTy}})) { MatchInfo = [=](MachineIRBuilder &B) { From 43470342c60d212051d577e23865c6612755c6f5 Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Thu, 2 Jan 2025 14:33:27 -0800 Subject: [PATCH 5/5] fixup! Address review comments --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 78f9730fbbcd9..4e3aaf5da7198 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7068,8 +7068,6 @@ bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI, assert(MI.getOpcode() == TargetOpcode::G_SUB); Register DestReg = MI.getOperand(0).getReg(); LLT DestTy = MRI.getType(DestReg); - if (!isLegal({TargetOpcode::G_SUB, {DestTy}})) - return false; Register X; Register Sub0;