Skip to content

Commit e0aec1f

Browse files
authored
[RISCV] For (2^N +/- 2^M) muls, prefer ADD to SUB (llvm#166757)
This changes muls by `3 << C` from `(X << C + 2) - (X << C)` to `(X << C + 1) + (X << C)`. If Zba is available, the output is not affected as we emit `(shl (sh1add X, X), C)` instead. There are two advantages: - ADD is more compressible - Often a reduced instruction count, by a heuristic that `(X << C + 1)` is more likely to have another use than `(X << C + 2)`
1 parent 09122fe commit e0aec1f

File tree

12 files changed

+742
-772
lines changed

12 files changed

+742
-772
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16791,22 +16791,35 @@ static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,
1679116791
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
1679216792
uint64_t MulAmt) {
1679316793
uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16794+
SDValue X = N->getOperand(0);
1679416795
ISD::NodeType Op;
1679516796
uint64_t ShiftAmt1;
16796-
if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16797-
Op = ISD::SUB;
16798-
ShiftAmt1 = MulAmt + MulAmtLowBit;
16799-
} else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16797+
bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit);
16798+
auto PreferSub = [X, MulAmtLowBit]() {
16799+
// For MulAmt == 3 << M both (X << M + 2) - (X << M)
16800+
// and (X << M + 1) + (X << M) are valid expansions.
16801+
// Prefer SUB if we can get (X << M + 2) for free,
16802+
// because X is exact (Y >> M + 2).
16803+
uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;
16804+
using namespace SDPatternMatch;
16805+
return sd_match(X, m_AnyOf(m_Sra(m_Value(), m_SpecificInt(ShAmt)),
16806+
m_Srl(m_Value(), m_SpecificInt(ShAmt)))) &&
16807+
X->getFlags().hasExact();
16808+
};
16809+
if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {
1680016810
Op = ISD::ADD;
1680116811
ShiftAmt1 = MulAmt - MulAmtLowBit;
16812+
} else if (CanSub) {
16813+
Op = ISD::SUB;
16814+
ShiftAmt1 = MulAmt + MulAmtLowBit;
1680216815
} else {
1680316816
return SDValue();
1680416817
}
1680516818
EVT VT = N->getValueType(0);
1680616819
SDLoc DL(N);
16807-
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16820+
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X,
1680816821
DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16809-
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16822+
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X,
1681016823
DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
1681116824
return DAG.getNode(Op, DL, VT, Shift1, Shift2);
1681216825
}

llvm/test/CodeGen/RISCV/mul.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,29 +1185,29 @@ define i32 @muli32_p384(i32 %a) nounwind {
11851185
; RV32I-LABEL: muli32_p384:
11861186
; RV32I: # %bb.0:
11871187
; RV32I-NEXT: slli a1, a0, 7
1188-
; RV32I-NEXT: slli a0, a0, 9
1189-
; RV32I-NEXT: sub a0, a0, a1
1188+
; RV32I-NEXT: slli a0, a0, 8
1189+
; RV32I-NEXT: add a0, a0, a1
11901190
; RV32I-NEXT: ret
11911191
;
11921192
; RV32IM-LABEL: muli32_p384:
11931193
; RV32IM: # %bb.0:
11941194
; RV32IM-NEXT: slli a1, a0, 7
1195-
; RV32IM-NEXT: slli a0, a0, 9
1196-
; RV32IM-NEXT: sub a0, a0, a1
1195+
; RV32IM-NEXT: slli a0, a0, 8
1196+
; RV32IM-NEXT: add a0, a0, a1
11971197
; RV32IM-NEXT: ret
11981198
;
11991199
; RV64I-LABEL: muli32_p384:
12001200
; RV64I: # %bb.0:
12011201
; RV64I-NEXT: slli a1, a0, 7
1202-
; RV64I-NEXT: slli a0, a0, 9
1203-
; RV64I-NEXT: sub a0, a0, a1
1202+
; RV64I-NEXT: slli a0, a0, 8
1203+
; RV64I-NEXT: add a0, a0, a1
12041204
; RV64I-NEXT: ret
12051205
;
12061206
; RV64IM-LABEL: muli32_p384:
12071207
; RV64IM: # %bb.0:
12081208
; RV64IM-NEXT: slli a1, a0, 7
1209-
; RV64IM-NEXT: slli a0, a0, 9
1210-
; RV64IM-NEXT: subw a0, a0, a1
1209+
; RV64IM-NEXT: slli a0, a0, 8
1210+
; RV64IM-NEXT: addw a0, a0, a1
12111211
; RV64IM-NEXT: ret
12121212
%1 = mul i32 %a, 384
12131213
ret i32 %1
@@ -1217,29 +1217,29 @@ define i32 @muli32_p12288(i32 %a) nounwind {
12171217
; RV32I-LABEL: muli32_p12288:
12181218
; RV32I: # %bb.0:
12191219
; RV32I-NEXT: slli a1, a0, 12
1220-
; RV32I-NEXT: slli a0, a0, 14
1221-
; RV32I-NEXT: sub a0, a0, a1
1220+
; RV32I-NEXT: slli a0, a0, 13
1221+
; RV32I-NEXT: add a0, a0, a1
12221222
; RV32I-NEXT: ret
12231223
;
12241224
; RV32IM-LABEL: muli32_p12288:
12251225
; RV32IM: # %bb.0:
12261226
; RV32IM-NEXT: slli a1, a0, 12
1227-
; RV32IM-NEXT: slli a0, a0, 14
1228-
; RV32IM-NEXT: sub a0, a0, a1
1227+
; RV32IM-NEXT: slli a0, a0, 13
1228+
; RV32IM-NEXT: add a0, a0, a1
12291229
; RV32IM-NEXT: ret
12301230
;
12311231
; RV64I-LABEL: muli32_p12288:
12321232
; RV64I: # %bb.0:
12331233
; RV64I-NEXT: slli a1, a0, 12
1234-
; RV64I-NEXT: slli a0, a0, 14
1235-
; RV64I-NEXT: sub a0, a0, a1
1234+
; RV64I-NEXT: slli a0, a0, 13
1235+
; RV64I-NEXT: add a0, a0, a1
12361236
; RV64I-NEXT: ret
12371237
;
12381238
; RV64IM-LABEL: muli32_p12288:
12391239
; RV64IM: # %bb.0:
12401240
; RV64IM-NEXT: slli a1, a0, 12
1241-
; RV64IM-NEXT: slli a0, a0, 14
1242-
; RV64IM-NEXT: subw a0, a0, a1
1241+
; RV64IM-NEXT: slli a0, a0, 13
1242+
; RV64IM-NEXT: addw a0, a0, a1
12431243
; RV64IM-NEXT: ret
12441244
%1 = mul i32 %a, 12288
12451245
ret i32 %1
@@ -2117,14 +2117,14 @@ define i64 @muland_demand(i64 %x) nounwind {
21172117
; RV32IM: # %bb.0:
21182118
; RV32IM-NEXT: andi a0, a0, -8
21192119
; RV32IM-NEXT: slli a2, a1, 2
2120-
; RV32IM-NEXT: slli a1, a1, 4
2121-
; RV32IM-NEXT: sub a1, a1, a2
2120+
; RV32IM-NEXT: slli a1, a1, 3
2121+
; RV32IM-NEXT: add a1, a1, a2
21222122
; RV32IM-NEXT: li a2, 12
21232123
; RV32IM-NEXT: mulhu a2, a0, a2
21242124
; RV32IM-NEXT: add a1, a2, a1
21252125
; RV32IM-NEXT: slli a2, a0, 2
2126-
; RV32IM-NEXT: slli a0, a0, 4
2127-
; RV32IM-NEXT: sub a0, a0, a2
2126+
; RV32IM-NEXT: slli a0, a0, 3
2127+
; RV32IM-NEXT: add a0, a0, a2
21282128
; RV32IM-NEXT: ret
21292129
;
21302130
; RV64I-LABEL: muland_demand:
@@ -2133,16 +2133,16 @@ define i64 @muland_demand(i64 %x) nounwind {
21332133
; RV64I-NEXT: srli a1, a1, 2
21342134
; RV64I-NEXT: and a0, a0, a1
21352135
; RV64I-NEXT: slli a1, a0, 2
2136-
; RV64I-NEXT: slli a0, a0, 4
2137-
; RV64I-NEXT: sub a0, a0, a1
2136+
; RV64I-NEXT: slli a0, a0, 3
2137+
; RV64I-NEXT: add a0, a0, a1
21382138
; RV64I-NEXT: ret
21392139
;
21402140
; RV64IM-LABEL: muland_demand:
21412141
; RV64IM: # %bb.0:
21422142
; RV64IM-NEXT: andi a0, a0, -8
21432143
; RV64IM-NEXT: slli a1, a0, 2
2144-
; RV64IM-NEXT: slli a0, a0, 4
2145-
; RV64IM-NEXT: sub a0, a0, a1
2144+
; RV64IM-NEXT: slli a0, a0, 3
2145+
; RV64IM-NEXT: add a0, a0, a1
21462146
; RV64IM-NEXT: ret
21472147
%and = and i64 %x, 4611686018427387896
21482148
%mul = mul i64 %and, 12
@@ -2171,15 +2171,15 @@ define i64 @mulzext_demand(i32 signext %x) nounwind {
21712171
; RV64I-LABEL: mulzext_demand:
21722172
; RV64I: # %bb.0:
21732173
; RV64I-NEXT: slli a1, a0, 32
2174-
; RV64I-NEXT: slli a0, a0, 34
2175-
; RV64I-NEXT: sub a0, a0, a1
2174+
; RV64I-NEXT: slli a0, a0, 33
2175+
; RV64I-NEXT: add a0, a0, a1
21762176
; RV64I-NEXT: ret
21772177
;
21782178
; RV64IM-LABEL: mulzext_demand:
21792179
; RV64IM: # %bb.0:
21802180
; RV64IM-NEXT: slli a1, a0, 32
2181-
; RV64IM-NEXT: slli a0, a0, 34
2182-
; RV64IM-NEXT: sub a0, a0, a1
2181+
; RV64IM-NEXT: slli a0, a0, 33
2182+
; RV64IM-NEXT: add a0, a0, a1
21832183
; RV64IM-NEXT: ret
21842184
%ext = zext i32 %x to i64
21852185
%mul = mul i64 %ext, 12884901888

llvm/test/CodeGen/RISCV/pr145360.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ define i32 @unsigned(i32 %0, ptr %1) {
2727
; CHECK-NEXT: slli a4, a3, 32
2828
; CHECK-NEXT: mulhu a2, a2, a4
2929
; CHECK-NEXT: srli a2, a2, 36
30-
; CHECK-NEXT: slli a4, a2, 5
31-
; CHECK-NEXT: slli a2, a2, 3
32-
; CHECK-NEXT: sub a2, a2, a4
30+
; CHECK-NEXT: slli a4, a2, 3
31+
; CHECK-NEXT: slli a2, a2, 4
32+
; CHECK-NEXT: add a2, a2, a4
3333
; CHECK-NEXT: srliw a4, a0, 3
34-
; CHECK-NEXT: add a2, a0, a2
34+
; CHECK-NEXT: sub a2, a0, a2
3535
; CHECK-NEXT: mulw a0, a4, a3
3636
; CHECK-NEXT: sw a2, 0(a1)
3737
; CHECK-NEXT: ret
@@ -68,10 +68,10 @@ define i32 @unsigned_div_first(i32 %0, ptr %1) {
6868
; CHECK-NEXT: slli a3, a3, 32
6969
; CHECK-NEXT: mulhu a2, a2, a3
7070
; CHECK-NEXT: srli a2, a2, 36
71-
; CHECK-NEXT: slli a3, a2, 5
72-
; CHECK-NEXT: slli a4, a2, 3
73-
; CHECK-NEXT: sub a4, a4, a3
74-
; CHECK-NEXT: add a0, a0, a4
71+
; CHECK-NEXT: slli a3, a2, 3
72+
; CHECK-NEXT: slli a4, a2, 4
73+
; CHECK-NEXT: add a3, a4, a3
74+
; CHECK-NEXT: sub a0, a0, a3
7575
; CHECK-NEXT: sw a0, 0(a1)
7676
; CHECK-NEXT: mv a0, a2
7777
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rv32xtheadba.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ define i32 @addmul6(i32 %a, i32 %b) {
9898
; RV32I-LABEL: addmul6:
9999
; RV32I: # %bb.0:
100100
; RV32I-NEXT: slli a2, a0, 1
101-
; RV32I-NEXT: slli a0, a0, 3
102-
; RV32I-NEXT: sub a0, a0, a2
101+
; RV32I-NEXT: slli a0, a0, 2
102+
; RV32I-NEXT: add a0, a0, a2
103103
; RV32I-NEXT: add a0, a0, a1
104104
; RV32I-NEXT: ret
105105
;
@@ -136,8 +136,8 @@ define i32 @addmul12(i32 %a, i32 %b) {
136136
; RV32I-LABEL: addmul12:
137137
; RV32I: # %bb.0:
138138
; RV32I-NEXT: slli a2, a0, 2
139-
; RV32I-NEXT: slli a0, a0, 4
140-
; RV32I-NEXT: sub a0, a0, a2
139+
; RV32I-NEXT: slli a0, a0, 3
140+
; RV32I-NEXT: add a0, a0, a2
141141
; RV32I-NEXT: add a0, a0, a1
142142
; RV32I-NEXT: ret
143143
;
@@ -193,8 +193,8 @@ define i32 @addmul24(i32 %a, i32 %b) {
193193
; RV32I-LABEL: addmul24:
194194
; RV32I: # %bb.0:
195195
; RV32I-NEXT: slli a2, a0, 3
196-
; RV32I-NEXT: slli a0, a0, 5
197-
; RV32I-NEXT: sub a0, a0, a2
196+
; RV32I-NEXT: slli a0, a0, 4
197+
; RV32I-NEXT: add a0, a0, a2
198198
; RV32I-NEXT: add a0, a0, a1
199199
; RV32I-NEXT: ret
200200
;
@@ -269,8 +269,8 @@ define i32 @mul96(i32 %a) {
269269
; RV32I-LABEL: mul96:
270270
; RV32I: # %bb.0:
271271
; RV32I-NEXT: slli a1, a0, 5
272-
; RV32I-NEXT: slli a0, a0, 7
273-
; RV32I-NEXT: sub a0, a0, a1
272+
; RV32I-NEXT: slli a0, a0, 6
273+
; RV32I-NEXT: add a0, a0, a1
274274
; RV32I-NEXT: ret
275275
;
276276
; RV32XTHEADBA-LABEL: mul96:

llvm/test/CodeGen/RISCV/rv32zba.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ define i32 @addmul6(i32 %a, i32 %b) {
8585
; RV32I-LABEL: addmul6:
8686
; RV32I: # %bb.0:
8787
; RV32I-NEXT: slli a2, a0, 1
88-
; RV32I-NEXT: slli a0, a0, 3
89-
; RV32I-NEXT: sub a0, a0, a2
88+
; RV32I-NEXT: slli a0, a0, 2
89+
; RV32I-NEXT: add a0, a0, a2
9090
; RV32I-NEXT: add a0, a0, a1
9191
; RV32I-NEXT: ret
9292
;
@@ -135,8 +135,8 @@ define i32 @addmul12(i32 %a, i32 %b) {
135135
; RV32I-LABEL: addmul12:
136136
; RV32I: # %bb.0:
137137
; RV32I-NEXT: slli a2, a0, 2
138-
; RV32I-NEXT: slli a0, a0, 4
139-
; RV32I-NEXT: sub a0, a0, a2
138+
; RV32I-NEXT: slli a0, a0, 3
139+
; RV32I-NEXT: add a0, a0, a2
140140
; RV32I-NEXT: add a0, a0, a1
141141
; RV32I-NEXT: ret
142142
;
@@ -210,8 +210,8 @@ define i32 @addmul24(i32 %a, i32 %b) {
210210
; RV32I-LABEL: addmul24:
211211
; RV32I: # %bb.0:
212212
; RV32I-NEXT: slli a2, a0, 3
213-
; RV32I-NEXT: slli a0, a0, 5
214-
; RV32I-NEXT: sub a0, a0, a2
213+
; RV32I-NEXT: slli a0, a0, 4
214+
; RV32I-NEXT: add a0, a0, a2
215215
; RV32I-NEXT: add a0, a0, a1
216216
; RV32I-NEXT: ret
217217
;
@@ -310,8 +310,8 @@ define i32 @mul96(i32 %a) {
310310
; RV32I-LABEL: mul96:
311311
; RV32I: # %bb.0:
312312
; RV32I-NEXT: slli a1, a0, 5
313-
; RV32I-NEXT: slli a0, a0, 7
314-
; RV32I-NEXT: sub a0, a0, a1
313+
; RV32I-NEXT: slli a0, a0, 6
314+
; RV32I-NEXT: add a0, a0, a1
315315
; RV32I-NEXT: ret
316316
;
317317
; RV32ZBA-LABEL: mul96:
@@ -1272,8 +1272,8 @@ define ptr @shl_add_knownbits(ptr %p, i32 %i) {
12721272
; RV32I-NEXT: slli a1, a1, 18
12731273
; RV32I-NEXT: srli a1, a1, 18
12741274
; RV32I-NEXT: slli a2, a1, 1
1275-
; RV32I-NEXT: slli a1, a1, 3
1276-
; RV32I-NEXT: sub a1, a1, a2
1275+
; RV32I-NEXT: slli a1, a1, 2
1276+
; RV32I-NEXT: add a1, a1, a2
12771277
; RV32I-NEXT: srli a1, a1, 3
12781278
; RV32I-NEXT: add a0, a0, a1
12791279
; RV32I-NEXT: ret

llvm/test/CodeGen/RISCV/rv64xtheadba.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ define i64 @addmul6(i64 %a, i64 %b) {
9494
; RV64I-LABEL: addmul6:
9595
; RV64I: # %bb.0:
9696
; RV64I-NEXT: slli a2, a0, 1
97-
; RV64I-NEXT: slli a0, a0, 3
98-
; RV64I-NEXT: sub a0, a0, a2
97+
; RV64I-NEXT: slli a0, a0, 2
98+
; RV64I-NEXT: add a0, a0, a2
9999
; RV64I-NEXT: add a0, a0, a1
100100
; RV64I-NEXT: ret
101101
;
@@ -113,8 +113,8 @@ define i64 @disjointormul6(i64 %a, i64 %b) {
113113
; RV64I-LABEL: disjointormul6:
114114
; RV64I: # %bb.0:
115115
; RV64I-NEXT: slli a2, a0, 1
116-
; RV64I-NEXT: slli a0, a0, 3
117-
; RV64I-NEXT: sub a0, a0, a2
116+
; RV64I-NEXT: slli a0, a0, 2
117+
; RV64I-NEXT: add a0, a0, a2
118118
; RV64I-NEXT: or a0, a0, a1
119119
; RV64I-NEXT: ret
120120
;
@@ -151,8 +151,8 @@ define i64 @addmul12(i64 %a, i64 %b) {
151151
; RV64I-LABEL: addmul12:
152152
; RV64I: # %bb.0:
153153
; RV64I-NEXT: slli a2, a0, 2
154-
; RV64I-NEXT: slli a0, a0, 4
155-
; RV64I-NEXT: sub a0, a0, a2
154+
; RV64I-NEXT: slli a0, a0, 3
155+
; RV64I-NEXT: add a0, a0, a2
156156
; RV64I-NEXT: add a0, a0, a1
157157
; RV64I-NEXT: ret
158158
;
@@ -227,8 +227,8 @@ define i64 @addmul24(i64 %a, i64 %b) {
227227
; RV64I-LABEL: addmul24:
228228
; RV64I: # %bb.0:
229229
; RV64I-NEXT: slli a2, a0, 3
230-
; RV64I-NEXT: slli a0, a0, 5
231-
; RV64I-NEXT: sub a0, a0, a2
230+
; RV64I-NEXT: slli a0, a0, 4
231+
; RV64I-NEXT: add a0, a0, a2
232232
; RV64I-NEXT: add a0, a0, a1
233233
; RV64I-NEXT: ret
234234
;
@@ -527,8 +527,8 @@ define i64 @mul96(i64 %a) {
527527
; RV64I-LABEL: mul96:
528528
; RV64I: # %bb.0:
529529
; RV64I-NEXT: slli a1, a0, 5
530-
; RV64I-NEXT: slli a0, a0, 7
531-
; RV64I-NEXT: sub a0, a0, a1
530+
; RV64I-NEXT: slli a0, a0, 6
531+
; RV64I-NEXT: add a0, a0, a1
532532
; RV64I-NEXT: ret
533533
;
534534
; RV64XTHEADBA-LABEL: mul96:
@@ -990,8 +990,8 @@ define signext i32 @mulw192(i32 signext %a) {
990990
; RV64I-LABEL: mulw192:
991991
; RV64I: # %bb.0:
992992
; RV64I-NEXT: slli a1, a0, 6
993-
; RV64I-NEXT: slli a0, a0, 8
994-
; RV64I-NEXT: subw a0, a0, a1
993+
; RV64I-NEXT: slli a0, a0, 7
994+
; RV64I-NEXT: addw a0, a0, a1
995995
; RV64I-NEXT: ret
996996
;
997997
; RV64XTHEADBA-LABEL: mulw192:

0 commit comments

Comments
 (0)