diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 27a4bbce1f5fc..f7d22328171d2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" @@ -79,6 +80,12 @@ static cl::opt "use for creating a floating-point immediate value"), cl::init(2)); +static cl::opt + ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, + cl::desc("Swap add and addi in cases where the add may " + "be combined with a shift"), + cl::init(true)); + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -14306,6 +14313,67 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT)); } +// Check if this SDValue is an add immediate that is fed by a shift of 1, 2, +// or 3. +static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, + SelectionDAG &DAG) { + using namespace llvm::SDPatternMatch; + + // Looking for a reg-reg add and not an addi. + if (isa(N->getOperand(1))) + return SDValue(); + + // Based on testing it seems that performance degrades if the ADDI has + // more than 2 uses. + if (AddI->use_size() > 2) + return SDValue(); + + APInt AddVal; + SDValue SHLVal; + if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal)))) + return SDValue(); + + APInt VShift; + if (!sd_match(SHLVal, m_BinOp(ISD::SHL, m_Value(), m_ConstInt(VShift)))) + return SDValue(); + + if (VShift.slt(1) || VShift.sgt(3)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + // The shift must be positive but the add can be signed. + uint64_t ShlConst = VShift.getZExtValue(); + int64_t AddConst = AddVal.getSExtValue(); + + SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0), + DAG.getConstant(ShlConst, DL, VT), Other); + return DAG.getNode(ISD::ADD, DL, VT, SHADD, + DAG.getSignedConstant(AddConst, DL, VT)); +} + +// Optimize (add (add (shl x, c0), c1), y) -> +// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3]. +static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + // Perform this optimization only in the zba extension. + if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba()) + return SDValue(); + + // Skip for vector types and larger types. + EVT VT = N->getValueType(0); + if (VT != Subtarget.getXLenVT()) + return SDValue(); + + SDValue AddI = N->getOperand(0); + SDValue Other = N->getOperand(1); + if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG)) + return V; + if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG)) + return V; + return SDValue(); +} + // Combine a constant select operand into its use: // // (and (select cond, -1, c), x) @@ -14547,9 +14615,12 @@ static SDValue performADDCombine(SDNode *N, return V; if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) return V; - if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) + if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) { if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) return V; + if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget)) + return V; + } if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) diff --git a/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll b/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll new file mode 100644 index 0000000000000..88ab1c0c3eaef --- /dev/null +++ b/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32-unknown-elf -mattr=+zba %s -o - | FileCheck %s + +declare i32 @callee1(i32 noundef) +declare i32 @callee2(i32 noundef, i32 noundef) +declare i32 @callee(i32 noundef, i32 noundef, i32 noundef, i32 noundef) + +define void @t1(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a2, a0, a2 +; CHECK-NEXT: sh2add a1, a0, a1 +; CHECK-NEXT: addi a1, a1, 45 +; CHECK-NEXT: addi a2, a2, 45 +; CHECK-NEXT: sh2add a3, a0, a3 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 45 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +define void @t2(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { +; CHECK-LABEL: t2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a5, a0, 42 +; CHECK-NEXT: add a4, a5, a1 +; CHECK-NEXT: add a3, a5, a2 +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add4 = add nsw i32 %add, %b + %add7 = add nsw i32 %add, %c + %call = tail call i32 @callee(i32 noundef %shl, i32 noundef %add, i32 noundef %add4, i32 noundef %add7) + ret void +} + +define void @t3(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) #0 { +; CHECK-LABEL: t3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a5, a0, 42 +; CHECK-NEXT: add a0, a5, a1 +; CHECK-NEXT: add a1, a5, a2 +; CHECK-NEXT: add a2, a5, a3 +; CHECK-NEXT: add a3, a5, a4 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %add2 = add nsw i32 %add, %c + %add3 = add nsw i32 %add, %d + %add4 = add nsw i32 %add, %e + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add2, i32 noundef %add3, i32 noundef %add4) + ret void +} + +define void @t4(i32 noundef %a, i32 noundef %b) #0 { +; CHECK-LABEL: t4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a0, a0, a1 +; CHECK-NEXT: addi a0, a0, 42 +; CHECK-NEXT: tail callee1 +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %call = tail call i32 @callee1(i32 noundef %add1) + ret void +} + +define void @t5(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { +; CHECK-LABEL: t5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a2, a0, a2 +; CHECK-NEXT: sh2add a0, a0, a1 +; CHECK-NEXT: addi a0, a0, 42 +; CHECK-NEXT: addi a1, a2, 42 +; CHECK-NEXT: tail callee2 +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %add2 = add nsw i32 %add, %c + %call = tail call i32 @callee2(i32 noundef %add1, i32 noundef %add2) + ret void +} + +define void @t6(i32 noundef %a, i32 noundef %b) #0 { +; CHECK-LABEL: t6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a0, 2 +; CHECK-NEXT: sh2add a0, a0, a1 +; CHECK-NEXT: addi a0, a0, 42 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %shl, i32 noundef %shl, i32 noundef %shl) + ret void +} + +define void @t7(i32 noundef %a, i32 noundef %b) #0 { +; CHECK-LABEL: t7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a2, a0, 42 +; CHECK-NEXT: add a0, a2, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add, i32 noundef %add, i32 noundef %add) + ret void +} + +define void @t8(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh3add a2, a0, a2 +; CHECK-NEXT: sh3add a1, a0, a1 +; CHECK-NEXT: lui a4, 1 +; CHECK-NEXT: addi a4, a4, 1307 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: sh3add a3, a0, a3 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 3 + %add = add nsw i32 %shl, 5403 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +define void @t9(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a2, a0, a2 +; CHECK-NEXT: sh2add a1, a0, a1 +; CHECK-NEXT: addi a1, a1, -42 +; CHECK-NEXT: addi a2, a2, -42 +; CHECK-NEXT: sh2add a3, a0, a3 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, -42 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +define void @t10(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, -2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +attributes #0 = { nounwind optsize }