-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Add combine for shadd family of instructions. #130829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
df5bcbf
38cfa79
b60ccb8
d452f5d
107d3d0
30e5845
373f2a8
c76c64e
a335458
c54ae87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| #include "llvm/CodeGen/MachineInstrBuilder.h" | ||
| #include "llvm/CodeGen/MachineJumpTableInfo.h" | ||
| #include "llvm/CodeGen/MachineRegisterInfo.h" | ||
| #include "llvm/CodeGen/SDPatternMatch.h" | ||
| #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" | ||
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | ||
| #include "llvm/CodeGen/ValueTypes.h" | ||
|
|
@@ -79,6 +80,12 @@ static cl::opt<int> | |
| "use for creating a floating-point immediate value"), | ||
| cl::init(2)); | ||
|
|
||
| static cl::opt<bool> | ||
| ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, | ||
| cl::desc("Swap add and addi in cases where the add may " | ||
| "be combined with a shift"), | ||
| cl::init(true)); | ||
|
|
||
| RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, | ||
| const RISCVSubtarget &STI) | ||
| : TargetLowering(TM), Subtarget(STI) { | ||
|
|
@@ -14306,6 +14313,83 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, | |
| return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT)); | ||
| } | ||
|
|
||
| // Check if this SDValue is an add immediate that is fed by a shift of 1, 2, | ||
| // or 3. | ||
| static bool checkAddiForShift(SDValue AddI, int64_t &AddConst, | ||
| int64_t &ShlConst) { | ||
| using namespace llvm::SDPatternMatch; | ||
| // Based on testing it seems that performance degrades if the ADDI has | ||
| // more than 2 uses. | ||
| if (AddI->use_size() > 2) | ||
| return false; | ||
|
|
||
| APInt AddVal; | ||
| SDValue SHLVal; | ||
| sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))); | ||
|
|
||
| APInt VShift; | ||
| if (!sd_match(SHLVal, m_c_BinOp(ISD::SHL, m_Value(), m_ConstInt(VShift)))) | ||
mshockwave marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return false; | ||
|
|
||
| if (VShift.slt(1) || VShift.sgt(3)) | ||
| return false; | ||
|
|
||
| // Set the values at the end when we know that the function will return | ||
| // true. | ||
| ShlConst = VShift.getSExtValue(); | ||
| AddConst = AddVal.getSExtValue(); | ||
| return true; | ||
| } | ||
|
|
||
| // Optimize (add (add (shl x, c0), c1), y) -> | ||
| // (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3]. | ||
| static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, | ||
| const RISCVSubtarget &Subtarget) { | ||
| using namespace llvm::SDPatternMatch; | ||
|
|
||
| // Perform this optimization only in the zba extension. | ||
| if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba()) | ||
| return SDValue(); | ||
|
|
||
| // Skip for vector types and larger types. | ||
| EVT VT = N->getValueType(0); | ||
| if (VT != Subtarget.getXLenVT()) | ||
| return SDValue(); | ||
|
|
||
| // Looking for a reg-reg add and not an addi. | ||
| if (isa<ConstantSDNode>(N->getOperand(1))) | ||
| return SDValue(); | ||
|
|
||
| SDValue AddI = N->getOperand(0); | ||
| SDValue Other = N->getOperand(1); | ||
| bool LHSIsAddI = sd_match(AddI, m_Add(m_Value(), m_ConstInt())); | ||
topperc marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| bool RHSIsAddI = sd_match(Other, m_Add(m_Value(), m_ConstInt())); | ||
| int64_t AddConst = 0; | ||
| int64_t ShlConst = 0; | ||
|
|
||
| // At least one addi is required. | ||
| if (!LHSIsAddI && !RHSIsAddI) | ||
| return SDValue(); | ||
|
|
||
| // If the LHS is not the result of an add or both sides are results of an add, | ||
| // but the LHS does not have the desired structure with a shift, swap the | ||
| // operands. | ||
| if (!LHSIsAddI || (RHSIsAddI && !checkAddiForShift(AddI, AddConst, ShlConst))) | ||
mshockwave marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| std::swap(AddI, Other); | ||
|
|
||
| // We simply need to ensure AddI has the desired structure. | ||
| if (!checkAddiForShift(AddI, AddConst, ShlConst)) | ||
| return SDValue(); | ||
|
|
||
| SDValue SHLVal = AddI->getOperand(0); | ||
| SDLoc DL(N); | ||
|
|
||
| SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0), | ||
topperc marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| DAG.getConstant(ShlConst, DL, VT), Other); | ||
| return DAG.getNode(ISD::ADD, DL, VT, SHADD, | ||
| DAG.getConstant(AddConst, DL, VT)); | ||
| } | ||
|
|
||
| // Combine a constant select operand into its use: | ||
| // | ||
| // (and (select cond, -1, c), x) | ||
|
|
@@ -14547,9 +14631,12 @@ static SDValue performADDCombine(SDNode *N, | |
| return V; | ||
| if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) | ||
| return V; | ||
| if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) | ||
| if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) { | ||
| if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) | ||
| return V; | ||
| if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't look at what
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good point. Running I do believe that this is the order in which they run. |
||
| return V; | ||
| } | ||
| if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) | ||
| return V; | ||
| if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -mtriple=riscv32-unknown-elf -mattr=+zba %s -o - | FileCheck %s | ||
|
|
||
| declare i32 @callee1(i32 noundef) | ||
| declare i32 @callee2(i32 noundef, i32 noundef) | ||
| declare i32 @callee(i32 noundef, i32 noundef, i32 noundef, i32 noundef) | ||
|
|
||
| define void @t1(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { | ||
| ; CHECK-LABEL: t1: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: sh2add a2, a0, a2 | ||
| ; CHECK-NEXT: sh2add a1, a0, a1 | ||
| ; CHECK-NEXT: addi a1, a1, 45 | ||
| ; CHECK-NEXT: addi a2, a2, 45 | ||
| ; CHECK-NEXT: sh2add a3, a0, a3 | ||
| ; CHECK-NEXT: mv a0, a1 | ||
| ; CHECK-NEXT: tail callee | ||
| entry: | ||
| %shl = shl i32 %a, 2 | ||
| %add = add nsw i32 %shl, 45 | ||
| %add1 = add nsw i32 %add, %b | ||
| %add3 = add nsw i32 %add, %c | ||
| %add5 = add nsw i32 %shl, %d | ||
| %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) | ||
| ret void | ||
| } | ||
|
|
||
| define void @t2(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { | ||
| ; CHECK-LABEL: t2: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: slli a0, a0, 2 | ||
| ; CHECK-NEXT: addi a5, a0, 42 | ||
| ; CHECK-NEXT: add a4, a5, a1 | ||
| ; CHECK-NEXT: add a3, a5, a2 | ||
| ; CHECK-NEXT: mv a1, a5 | ||
| ; CHECK-NEXT: mv a2, a4 | ||
| ; CHECK-NEXT: tail callee | ||
| entry: | ||
| %shl = shl i32 %a, 2 | ||
| %add = add nsw i32 %shl, 42 | ||
| %add4 = add nsw i32 %add, %b | ||
| %add7 = add nsw i32 %add, %c | ||
| %call = tail call i32 @callee(i32 noundef %shl, i32 noundef %add, i32 noundef %add4, i32 noundef %add7) | ||
| ret void | ||
| } | ||
|
|
||
| define void @t3(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) #0 { | ||
| ; CHECK-LABEL: t3: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: slli a0, a0, 2 | ||
| ; CHECK-NEXT: addi a5, a0, 42 | ||
| ; CHECK-NEXT: add a0, a5, a1 | ||
| ; CHECK-NEXT: add a1, a5, a2 | ||
| ; CHECK-NEXT: add a2, a5, a3 | ||
| ; CHECK-NEXT: add a3, a5, a4 | ||
| ; CHECK-NEXT: tail callee | ||
| entry: | ||
| %shl = shl i32 %a, 2 | ||
| %add = add nsw i32 %shl, 42 | ||
| %add1 = add nsw i32 %add, %b | ||
| %add2 = add nsw i32 %add, %c | ||
| %add3 = add nsw i32 %add, %d | ||
| %add4 = add nsw i32 %add, %e | ||
| %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add2, i32 noundef %add3, i32 noundef %add4) | ||
| ret void | ||
| } | ||
|
|
||
| define void @t4(i32 noundef %a, i32 noundef %b) #0 { | ||
| ; CHECK-LABEL: t4: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: sh2add a0, a0, a1 | ||
| ; CHECK-NEXT: addi a0, a0, 42 | ||
| ; CHECK-NEXT: tail callee1 | ||
| entry: | ||
| %shl = shl i32 %a, 2 | ||
| %add = add nsw i32 %shl, 42 | ||
| %add1 = add nsw i32 %add, %b | ||
| %call = tail call i32 @callee1(i32 noundef %add1) | ||
| ret void | ||
| } | ||
|
|
||
| define void @t5(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { | ||
| ; CHECK-LABEL: t5: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: sh2add a2, a0, a2 | ||
| ; CHECK-NEXT: sh2add a0, a0, a1 | ||
| ; CHECK-NEXT: addi a0, a0, 42 | ||
| ; CHECK-NEXT: addi a1, a2, 42 | ||
| ; CHECK-NEXT: tail callee2 | ||
| entry: | ||
| %shl = shl i32 %a, 2 | ||
| %add = add nsw i32 %shl, 42 | ||
| %add1 = add nsw i32 %add, %b | ||
| %add2 = add nsw i32 %add, %c | ||
| %call = tail call i32 @callee2(i32 noundef %add1, i32 noundef %add2) | ||
| ret void | ||
| } | ||
|
|
||
| define void @t6(i32 noundef %a, i32 noundef %b) #0 { | ||
| ; CHECK-LABEL: t6: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: slli a2, a0, 2 | ||
| ; CHECK-NEXT: sh2add a0, a0, a1 | ||
| ; CHECK-NEXT: addi a0, a0, 42 | ||
| ; CHECK-NEXT: mv a1, a2 | ||
| ; CHECK-NEXT: mv a3, a2 | ||
| ; CHECK-NEXT: tail callee | ||
| entry: | ||
| %shl = shl i32 %a, 2 | ||
| %add = add nsw i32 %shl, 42 | ||
| %add1 = add nsw i32 %add, %b | ||
| %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %shl, i32 noundef %shl, i32 noundef %shl) | ||
| ret void | ||
| } | ||
|
|
||
| define void @t7(i32 noundef %a, i32 noundef %b) #0 { | ||
| ; CHECK-LABEL: t7: | ||
| ; CHECK: # %bb.0: # %entry | ||
| ; CHECK-NEXT: slli a0, a0, 2 | ||
| ; CHECK-NEXT: addi a2, a0, 42 | ||
| ; CHECK-NEXT: add a0, a2, a1 | ||
| ; CHECK-NEXT: mv a1, a2 | ||
| ; CHECK-NEXT: mv a3, a2 | ||
| ; CHECK-NEXT: tail callee | ||
| entry: | ||
| %shl = shl i32 %a, 2 | ||
| %add = add nsw i32 %shl, 42 | ||
| %add1 = add nsw i32 %add, %b | ||
| %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add, i32 noundef %add, i32 noundef %add) | ||
| ret void | ||
| } | ||
|
|
||
| attributes #0 = { nounwind optsize } |
Uh oh!
There was an error while loading. Please reload this page.