Skip to content

Commit e6f369f

Browse files
committed
[RISCV] Prefer SUB if (X << C + 2) is free
1 parent 93664d2 commit e6f369f

File tree

4 files changed

+60
-48
lines changed

4 files changed

+60
-48
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16475,22 +16475,35 @@ static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,
1647516475
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
1647616476
uint64_t MulAmt) {
1647716477
uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16478+
SDValue X = N->getOperand(0);
1647816479
ISD::NodeType Op;
1647916480
uint64_t ShiftAmt1;
16480-
if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16481+
bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit);
16482+
auto PreferSub = [X, MulAmtLowBit]() {
16483+
// For MulAmt == 3 << M both (X << M + 2) - (X << M)
16484+
// and (X << M + 1) + (X << M) are valid expansions.
16485+
// Prefer SUB if we can get (X << M + 2) for free,
16486+
// because X is exact (Y >> M + 2).
16487+
uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;
16488+
using namespace SDPatternMatch;
16489+
return sd_match(X, m_AnyOf(m_Sra(m_Value(), m_SpecificInt(ShAmt)),
16490+
m_Srl(m_Value(), m_SpecificInt(ShAmt)))) &&
16491+
X->getFlags().hasExact();
16492+
};
16493+
if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {
1648116494
Op = ISD::ADD;
1648216495
ShiftAmt1 = MulAmt - MulAmtLowBit;
16483-
} else if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16496+
} else if (CanSub) {
1648416497
Op = ISD::SUB;
1648516498
ShiftAmt1 = MulAmt + MulAmtLowBit;
1648616499
} else {
1648716500
return SDValue();
1648816501
}
1648916502
EVT VT = N->getValueType(0);
1649016503
SDLoc DL(N);
16491-
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16504+
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X,
1649216505
DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16493-
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16506+
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X,
1649416507
DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
1649516508
return DAG.getNode(Op, DL, VT, Shift1, Shift2);
1649616509
}

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,8 +291,7 @@ define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_6(<vscale x 32 x i8> %vec) {
291291
; CHECK: # %bb.0:
292292
; CHECK-NEXT: csrr a0, vlenb
293293
; CHECK-NEXT: srli a1, a0, 2
294-
; CHECK-NEXT: srli a0, a0, 1
295-
; CHECK-NEXT: add a0, a0, a1
294+
; CHECK-NEXT: sub a0, a0, a1
296295
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
297296
; CHECK-NEXT: vslidedown.vx v8, v8, a0
298297
; CHECK-NEXT: ret
@@ -315,8 +314,7 @@ define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_22(<vscale x 32 x i8> %vec) {
315314
; CHECK: # %bb.0:
316315
; CHECK-NEXT: csrr a0, vlenb
317316
; CHECK-NEXT: srli a1, a0, 2
318-
; CHECK-NEXT: srli a0, a0, 1
319-
; CHECK-NEXT: add a0, a0, a1
317+
; CHECK-NEXT: sub a0, a0, a1
320318
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
321319
; CHECK-NEXT: vslidedown.vx v8, v10, a0
322320
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -475,15 +475,16 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
475475
; CHECK-NEXT: csrr a0, vlenb
476476
; CHECK-NEXT: slli a0, a0, 3
477477
; CHECK-NEXT: sub sp, sp, a0
478-
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
478+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
479479
; CHECK-NEXT: vmv1r.v v8, v0
480480
; CHECK-NEXT: csrr a0, vlenb
481+
; CHECK-NEXT: vmv.v.i v10, 0
481482
; CHECK-NEXT: srli a1, a0, 2
482-
; CHECK-NEXT: srli a0, a0, 1
483-
; CHECK-NEXT: add a2, a0, a1
483+
; CHECK-NEXT: sub a2, a0, a1
484+
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
484485
; CHECK-NEXT: vslidedown.vx v0, v0, a2
486+
; CHECK-NEXT: srli a0, a0, 1
485487
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
486-
; CHECK-NEXT: vmv.v.i v10, 0
487488
; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
488489
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
489490
; CHECK-NEXT: vslidedown.vx v0, v8, a0
@@ -606,12 +607,12 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
606607
; CHECK-NEXT: srli a1, a0, 2
607608
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
608609
; CHECK-NEXT: vslidedown.vx v0, v0, a1
609-
; CHECK-NEXT: srli a0, a0, 1
610-
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
610+
; CHECK-NEXT: srli a2, a0, 1
611+
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
611612
; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
612-
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
613-
; CHECK-NEXT: vslidedown.vx v0, v9, a0
614-
; CHECK-NEXT: add a0, a0, a1
613+
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
614+
; CHECK-NEXT: vslidedown.vx v0, v9, a2
615+
; CHECK-NEXT: sub a0, a0, a1
615616
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
616617
; CHECK-NEXT: vmerge.vim v20, v12, 1, v0
617618
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
@@ -832,37 +833,37 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
832833
; CHECK-NEXT: vmv1r.v v9, v0
833834
; CHECK-NEXT: vmv.v.i v10, 0
834835
; CHECK-NEXT: csrr a0, vlenb
835-
; CHECK-NEXT: addi a1, sp, 16
836-
; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
837-
; CHECK-NEXT: srli a2, a0, 2
838-
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
839-
; CHECK-NEXT: vslidedown.vx v0, v0, a2
840-
; CHECK-NEXT: srli a0, a0, 1
836+
; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
837+
; CHECK-NEXT: srli a1, a0, 2
838+
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
839+
; CHECK-NEXT: vslidedown.vx v0, v0, a1
840+
; CHECK-NEXT: srli a2, a0, 1
841841
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
842-
; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
842+
; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
843843
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
844-
; CHECK-NEXT: vslidedown.vx v0, v9, a0
845-
; CHECK-NEXT: add a0, a0, a2
844+
; CHECK-NEXT: vslidedown.vx v0, v9, a2
845+
; CHECK-NEXT: addi a2, sp, 16
846+
; CHECK-NEXT: sub a0, a0, a1
846847
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
847-
; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
848+
; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
848849
; CHECK-NEXT: vmv1r.v v0, v8
849-
; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
850+
; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
850851
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
851852
; CHECK-NEXT: vslidedown.vx v0, v9, a0
852-
; CHECK-NEXT: vs8r.v v24, (a1)
853+
; CHECK-NEXT: vs8r.v v16, (a2)
853854
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
854-
; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
855+
; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
855856
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
856-
; CHECK-NEXT: vslidedown.vx v0, v8, a2
857+
; CHECK-NEXT: vslidedown.vx v0, v8, a1
857858
; CHECK-NEXT: csrr a0, vlenb
858859
; CHECK-NEXT: slli a0, a0, 3
859860
; CHECK-NEXT: add a0, sp, a0
860861
; CHECK-NEXT: addi a0, a0, 16
861-
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
862-
; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
863-
; CHECK-NEXT: vs8r.v v16, (a0)
864-
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
865-
; CHECK-NEXT: vlseg6e8.v v16, (a1)
862+
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
863+
; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
864+
; CHECK-NEXT: vs8r.v v24, (a0)
865+
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
866+
; CHECK-NEXT: vlseg6e8.v v16, (a2)
866867
; CHECK-NEXT: vlseg6e8.v v10, (a0)
867868
; CHECK-NEXT: vmv2r.v v8, v16
868869
; CHECK-NEXT: vmv2r.v v22, v18
@@ -1060,15 +1061,16 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
10601061
; CHECK-NEXT: csrr a0, vlenb
10611062
; CHECK-NEXT: slli a0, a0, 4
10621063
; CHECK-NEXT: sub sp, sp, a0
1063-
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
1064+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
10641065
; CHECK-NEXT: vmv1r.v v9, v0
10651066
; CHECK-NEXT: csrr a0, vlenb
1067+
; CHECK-NEXT: vmv.v.i v12, 0
10661068
; CHECK-NEXT: srli a1, a0, 2
1067-
; CHECK-NEXT: srli a0, a0, 1
1068-
; CHECK-NEXT: add a2, a0, a1
1069+
; CHECK-NEXT: sub a2, a0, a1
1070+
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
10691071
; CHECK-NEXT: vslidedown.vx v0, v0, a2
1072+
; CHECK-NEXT: srli a0, a0, 1
10701073
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
1071-
; CHECK-NEXT: vmv.v.i v12, 0
10721074
; CHECK-NEXT: vmerge.vim v22, v12, 1, v0
10731075
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
10741076
; CHECK-NEXT: vslidedown.vx v0, v9, a0
@@ -1329,15 +1331,16 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
13291331
; CHECK-NEXT: csrr a0, vlenb
13301332
; CHECK-NEXT: slli a0, a0, 4
13311333
; CHECK-NEXT: sub sp, sp, a0
1332-
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
1334+
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
13331335
; CHECK-NEXT: vmv1r.v v9, v0
13341336
; CHECK-NEXT: csrr a0, vlenb
1337+
; CHECK-NEXT: vmv.v.i v10, 0
13351338
; CHECK-NEXT: srli a1, a0, 2
1336-
; CHECK-NEXT: srli a0, a0, 1
1337-
; CHECK-NEXT: add a2, a0, a1
1339+
; CHECK-NEXT: sub a2, a0, a1
1340+
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
13381341
; CHECK-NEXT: vslidedown.vx v0, v0, a2
1342+
; CHECK-NEXT: srli a0, a0, 1
13391343
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
1340-
; CHECK-NEXT: vmv.v.i v10, 0
13411344
; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
13421345
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
13431346
; CHECK-NEXT: vslidedown.vx v0, v9, a0

llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -907,8 +907,7 @@ define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
907907
; CHECK: # %bb.0:
908908
; CHECK-NEXT: csrr a0, vlenb
909909
; CHECK-NEXT: srli a1, a0, 2
910-
; CHECK-NEXT: srli a0, a0, 1
911-
; CHECK-NEXT: add a0, a0, a1
910+
; CHECK-NEXT: sub a0, a0, a1
912911
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
913912
; CHECK-NEXT: vfmv.s.f v10, fa0
914913
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
@@ -982,8 +981,7 @@ define half @vreduce_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
982981
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
983982
; CHECK-NEXT: vfmv.s.f v10, fa0
984983
; CHECK-NEXT: srli a1, a0, 2
985-
; CHECK-NEXT: srli a0, a0, 1
986-
; CHECK-NEXT: add a0, a0, a1
984+
; CHECK-NEXT: sub a0, a0, a1
987985
; CHECK-NEXT: lui a1, 1048568
988986
; CHECK-NEXT: vmv.s.x v11, a1
989987
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma

0 commit comments

Comments
 (0)