Skip to content

Commit df5bcbf

Browse files
[RISCV] Add combine for shadd family of instructions.
For example for the following situation: %6:gpr = SLLI %2:gpr, 2 %7:gpr = ADDI killed %6:gpr, 24 %8:gpr = ADD %0:gpr, %7:gpr If we swap the two add instrucions we can merge the shift and add. The final code will look something like this: %7 = SH2ADD %0, %2 %8 = ADDI %7, 24
1 parent f3e5594 commit df5bcbf

File tree

2 files changed

+204
-1
lines changed

2 files changed

+204
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ static cl::opt<int>
7979
"use for creating a floating-point immediate value"),
8080
cl::init(2));
8181

82+
static cl::opt<bool>
83+
ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
84+
cl::desc("Swap add and addi in cases where the add may "
85+
"be combined with a shift"),
86+
cl::init(true));
87+
8288
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
8389
const RISCVSubtarget &STI)
8490
: TargetLowering(TM), Subtarget(STI) {
@@ -14306,6 +14312,87 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
1430614312
return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
1430714313
}
1430814314

14315+
// Check if this SDValue is an add immediate and then
14316+
static bool checkAddiForShift(SDValue AddI) {
14317+
// Based on testing it seems that performance degrades if the ADDI has
14318+
// more than 2 uses.
14319+
if (AddI->use_size() > 2)
14320+
return false;
14321+
14322+
ConstantSDNode *AddConst = dyn_cast<ConstantSDNode>(AddI->getOperand(1));
14323+
if (!AddConst)
14324+
return false;
14325+
14326+
SDValue SHLVal = AddI->getOperand(0);
14327+
if (SHLVal->getOpcode() != ISD::SHL)
14328+
return false;
14329+
14330+
ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(SHLVal->getOperand(1));
14331+
if (!ShiftNode)
14332+
return false;
14333+
14334+
auto ShiftVal = ShiftNode->getSExtValue();
14335+
if (ShiftVal != 1 && ShiftVal != 2 && ShiftVal != 3)
14336+
return false;
14337+
14338+
return true;
14339+
}
14340+
14341+
// Optimize (add (add (shl x, c0), c1), y) ->
14342+
// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
14343+
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG,
14344+
const RISCVSubtarget &Subtarget) {
14345+
if (!ReassocShlAddiAdd)
14346+
return SDValue();
14347+
14348+
// Perform this optimization only in the zba extension.
14349+
if (!Subtarget.hasStdExtZba())
14350+
return SDValue();
14351+
14352+
// Skip for vector types and larger types.
14353+
EVT VT = N->getValueType(0);
14354+
if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
14355+
return SDValue();
14356+
14357+
// Looking for a reg-reg add and not an addi.
14358+
auto *Op1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
14359+
if (Op1)
14360+
return SDValue();
14361+
SDValue AddI;
14362+
SDValue Other;
14363+
14364+
if (N->getOperand(0)->getOpcode() == ISD::ADD &&
14365+
N->getOperand(1)->getOpcode() == ISD::ADD) {
14366+
AddI = N->getOperand(0);
14367+
Other = N->getOperand(1);
14368+
if (!checkAddiForShift(AddI)) {
14369+
AddI = N->getOperand(1);
14370+
Other = N->getOperand(0);
14371+
}
14372+
} else if (N->getOperand(0)->getOpcode() == ISD::ADD) {
14373+
AddI = N->getOperand(0);
14374+
Other = N->getOperand(1);
14375+
} else if (N->getOperand(1)->getOpcode() == ISD::ADD) {
14376+
AddI = N->getOperand(1);
14377+
Other = N->getOperand(0);
14378+
} else
14379+
return SDValue();
14380+
14381+
if (!checkAddiForShift(AddI))
14382+
return SDValue();
14383+
14384+
auto *AddConst = dyn_cast<ConstantSDNode>(AddI->getOperand(1));
14385+
SDValue SHLVal = AddI->getOperand(0);
14386+
auto *ShiftNode = dyn_cast<ConstantSDNode>(SHLVal->getOperand(1));
14387+
auto ShiftVal = ShiftNode->getSExtValue();
14388+
SDLoc DL(N);
14389+
14390+
SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
14391+
DAG.getConstant(ShiftVal, DL, VT), Other);
14392+
return DAG.getNode(ISD::ADD, DL, VT, SHADD,
14393+
DAG.getConstant(AddConst->getSExtValue(), DL, VT));
14394+
}
14395+
1430914396
// Combine a constant select operand into its use:
1431014397
//
1431114398
// (and (select cond, -1, c), x)
@@ -14547,9 +14634,12 @@ static SDValue performADDCombine(SDNode *N,
1454714634
return V;
1454814635
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
1454914636
return V;
14550-
if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14637+
if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
1455114638
if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
1455214639
return V;
14640+
if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
14641+
return V;
14642+
}
1455314643
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
1455414644
return V;
1455514645
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
; RUN: llc -mtriple=riscv32-pc-unknown-gnu -mattr=+zba %s -o - | FileCheck %s
2+
3+
declare dso_local i32 @callee1(i32 noundef) local_unnamed_addr
4+
declare dso_local i32 @callee2(i32 noundef, i32 noundef) local_unnamed_addr
5+
declare dso_local i32 @callee(i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr
6+
7+
; CHECK-LABEL: t1:
8+
; CHECK: sh2add
9+
; CHECK: sh2add
10+
; CHECK: sh2add
11+
; CHECK: tail callee
12+
13+
define dso_local void @t1(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) local_unnamed_addr #0 {
14+
entry:
15+
%shl = shl i32 %a, 2
16+
%add = add nsw i32 %shl, 45
17+
%add1 = add nsw i32 %add, %b
18+
%add3 = add nsw i32 %add, %c
19+
%add5 = add nsw i32 %shl, %d
20+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5)
21+
ret void
22+
}
23+
24+
; CHECK-LABEL: t2:
25+
; CHECK: slli
26+
; CHECK-NOT: sh2add
27+
; CHECK: tail callee
28+
29+
define dso_local void @t2(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
30+
entry:
31+
%shl = shl i32 %a, 2
32+
%add = add nsw i32 %shl, 42
33+
%add4 = add nsw i32 %add, %b
34+
%add7 = add nsw i32 %add, %c
35+
%call = tail call i32 @callee(i32 noundef %shl, i32 noundef %add, i32 noundef %add4, i32 noundef %add7)
36+
ret void
37+
}
38+
39+
; CHECK-LABEL: t3
40+
; CHECK slli
41+
; CHECK-NOT: sh2add
42+
; CHECK: tail callee
43+
44+
define dso_local void @t3(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) local_unnamed_addr #0 {
45+
entry:
46+
%shl = shl i32 %a, 2
47+
%add = add nsw i32 %shl, 42
48+
%add1 = add nsw i32 %add, %b
49+
%add2 = add nsw i32 %add, %c
50+
%add3 = add nsw i32 %add, %d
51+
%add4 = add nsw i32 %add, %e
52+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add2, i32 noundef %add3, i32 noundef %add4)
53+
ret void
54+
}
55+
56+
; CHECK-LABEL: t4
57+
; CHECK: sh2add
58+
; CHECK-NEXT: addi
59+
; CHECK-NEXT: tail callee1
60+
61+
define dso_local void @t4(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
62+
entry:
63+
%shl = shl i32 %a, 2
64+
%add = add nsw i32 %shl, 42
65+
%add1 = add nsw i32 %add, %b
66+
%call = tail call i32 @callee1(i32 noundef %add1)
67+
ret void
68+
}
69+
70+
; CHECK-LABEL: t5
71+
; CHECK: sh2add
72+
; CHECK: sh2add
73+
; CHECK: tail callee2
74+
75+
define dso_local void @t5(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr #0 {
76+
entry:
77+
%shl = shl i32 %a, 2
78+
%add = add nsw i32 %shl, 42
79+
%add1 = add nsw i32 %add, %b
80+
%add2 = add nsw i32 %add, %c
81+
%call = tail call i32 @callee2(i32 noundef %add1, i32 noundef %add2)
82+
ret void
83+
}
84+
85+
; CHECK-LABEL: t6
86+
; CHECK-DAG: sh2add
87+
; CHECK-DAG: slli
88+
; CHECK: tail callee
89+
90+
define dso_local void @t6(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
91+
entry:
92+
%shl = shl i32 %a, 2
93+
%add = add nsw i32 %shl, 42
94+
%add1 = add nsw i32 %add, %b
95+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %shl, i32 noundef %shl, i32 noundef %shl)
96+
ret void
97+
}
98+
99+
; CHECK-LABEL: t7
100+
; CHECK: slli
101+
; CHECK-NOT: sh2add
102+
; CHECK: tail callee
103+
104+
define dso_local void @t7(i32 noundef %a, i32 noundef %b) local_unnamed_addr #0 {
105+
entry:
106+
%shl = shl i32 %a, 2
107+
%add = add nsw i32 %shl, 42
108+
%add1 = add nsw i32 %add, %b
109+
%call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add, i32 noundef %add, i32 noundef %add)
110+
ret void
111+
}
112+
113+
attributes #0 = { nounwind optsize }

0 commit comments

Comments
 (0)