Skip to content

Commit 1dd94a2

Browse files
authored
[RISCV] Add helper method for shift-and-add extensions (#158638)
Not an NFC, as it improves consistency, enabling some cases for XAndesPerf and XTheadBa.
1 parent 5b7f928 commit 1dd94a2

File tree

7 files changed

+132
-95
lines changed

7 files changed

+132
-95
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3204,9 +3204,7 @@ static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
32043204
// If we have a SHXADD instruction, prefer that over reassociating an ADDI.
32053205
assert(Shift.getOpcode() == ISD::SHL);
32063206
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3207-
if ((ShiftAmt <= 3 &&
3208-
(Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) ||
3209-
(ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac()))
3207+
if (Subtarget.hasShlAdd(ShiftAmt))
32103208
return false;
32113209

32123210
// All users of the ADDI should be load/store.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15349,11 +15349,9 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
1534915349
// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
1535015350
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
1535115351
const RISCVSubtarget &Subtarget) {
15352-
const bool HasStdExtZba = Subtarget.hasStdExtZba();
15353-
const bool HasVendorXAndesPerf = Subtarget.hasVendorXAndesPerf();
15354-
const bool HasVendorXqciac = Subtarget.hasVendorXqciac();
15355-
// Perform this optimization only in the zba/xandesperf/xqciac extension.
15356-
if (!HasStdExtZba && !HasVendorXAndesPerf && !HasVendorXqciac)
15352+
// Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15353+
// extension.
15354+
if (!Subtarget.hasShlAdd(3))
1535715355
return SDValue();
1535815356

1535915357
// Skip for vector types and larger types.
@@ -15379,16 +15377,7 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
1537915377
return SDValue();
1538015378

1538115379
int64_t Diff = std::abs(C0 - C1);
15382-
bool IsShXaddDiff = Diff == 1 || Diff == 2 || Diff == 3;
15383-
bool HasShXadd = HasStdExtZba || HasVendorXAndesPerf;
15384-
15385-
// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
15386-
if ((!IsShXaddDiff && HasShXadd && !HasVendorXqciac) ||
15387-
(IsShXaddDiff && !HasShXadd && HasVendorXqciac))
15388-
return SDValue();
15389-
15390-
// Skip if QC_SHLADD is not applicable.
15391-
if (Diff == 0 || Diff > 31)
15380+
if (!Subtarget.hasShlAdd(Diff))
1539215381
return SDValue();
1539315382

1539415383
// Build nodes.
@@ -15445,7 +15434,7 @@ static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other,
1544515434
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG,
1544615435
const RISCVSubtarget &Subtarget) {
1544715436
// Perform this optimization only in the zba extension.
15448-
if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba())
15437+
if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
1544915438
return SDValue();
1545015439

1545115440
// Skip for vector types and larger types.
@@ -16375,17 +16364,13 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
1637516364
if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
1637616365
return SDValue();
1637716366

16378-
const bool HasShlAdd = Subtarget.hasStdExtZba() ||
16379-
Subtarget.hasVendorXTHeadBa() ||
16380-
Subtarget.hasVendorXAndesPerf();
16381-
1638216367
// WARNING: The code below is knowingly incorrect with regards to undef semantics.
1638316368
// We're adding additional uses of X here, and in principle, we should be freezing
1638416369
// X before doing so. However, adding freeze here causes real regressions, and no
1638516370
// other target properly freezes X in these cases either.
1638616371
SDValue X = N->getOperand(0);
1638716372

16388-
if (HasShlAdd) {
16373+
if (Subtarget.hasShlAdd(3)) {
1638916374
for (uint64_t Divisor : {3, 5, 9}) {
1639016375
if (MulAmt % Divisor != 0)
1639116376
continue;
@@ -21333,14 +21318,8 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
2133321318
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2133421319
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
2133521320

21336-
bool IsShXAdd =
21337-
(Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
21338-
C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
21339-
bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
21340-
C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;
21341-
2134221321
// Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
21343-
if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
21322+
if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
2134421323
N->user_begin()->getOpcode() == ISD::ADD &&
2134521324
!isUsedByLdSt(*N->user_begin(), nullptr) &&
2134621325
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
@@ -24398,7 +24377,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
2439824377
return true;
2439924378

2440024379
// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
24401-
if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
24380+
if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
2440224381
((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
2440324382
(Imm - 8).isPowerOf2()))
2440424383
return true;

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4492,7 +4492,7 @@ void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
44924492
.addReg(DestReg, RegState::Kill)
44934493
.addImm(ShiftAmount)
44944494
.setMIFlag(Flag);
4495-
} else if (STI.hasStdExtZba() &&
4495+
} else if (STI.hasShlAdd(3) &&
44964496
((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||
44974497
(Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||
44984498
(Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,14 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
209209
hasShortForwardBranchOpt();
210210
}
211211

212+
bool hasShlAdd(int64_t ShAmt) const {
213+
if (ShAmt <= 0)
214+
return false;
215+
if (ShAmt <= 3)
216+
return HasStdExtZba || HasVendorXAndesPerf || HasVendorXTHeadBa;
217+
return ShAmt <= 31 && HasVendorXqciac;
218+
}
219+
212220
bool is64Bit() const { return IsRV64; }
213221
MVT getXLenVT() const {
214222
return is64Bit() ? MVT::i64 : MVT::i32;

llvm/test/CodeGen/RISCV/rv32xtheadba.ll

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -656,38 +656,56 @@ define i32 @add8192(i32 %a) {
656656
}
657657

658658
define i32 @addshl_5_6(i32 %a, i32 %b) {
659-
; CHECK-LABEL: addshl_5_6:
660-
; CHECK: # %bb.0:
661-
; CHECK-NEXT: slli a0, a0, 5
662-
; CHECK-NEXT: slli a1, a1, 6
663-
; CHECK-NEXT: add a0, a0, a1
664-
; CHECK-NEXT: ret
659+
; RV32I-LABEL: addshl_5_6:
660+
; RV32I: # %bb.0:
661+
; RV32I-NEXT: slli a0, a0, 5
662+
; RV32I-NEXT: slli a1, a1, 6
663+
; RV32I-NEXT: add a0, a0, a1
664+
; RV32I-NEXT: ret
665+
;
666+
; RV32XTHEADBA-LABEL: addshl_5_6:
667+
; RV32XTHEADBA: # %bb.0:
668+
; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 1
669+
; RV32XTHEADBA-NEXT: slli a0, a0, 5
670+
; RV32XTHEADBA-NEXT: ret
665671
%c = shl i32 %a, 5
666672
%d = shl i32 %b, 6
667673
%e = add i32 %c, %d
668674
ret i32 %e
669675
}
670676

671677
define i32 @addshl_5_7(i32 %a, i32 %b) {
672-
; CHECK-LABEL: addshl_5_7:
673-
; CHECK: # %bb.0:
674-
; CHECK-NEXT: slli a0, a0, 5
675-
; CHECK-NEXT: slli a1, a1, 7
676-
; CHECK-NEXT: add a0, a0, a1
677-
; CHECK-NEXT: ret
678+
; RV32I-LABEL: addshl_5_7:
679+
; RV32I: # %bb.0:
680+
; RV32I-NEXT: slli a0, a0, 5
681+
; RV32I-NEXT: slli a1, a1, 7
682+
; RV32I-NEXT: add a0, a0, a1
683+
; RV32I-NEXT: ret
684+
;
685+
; RV32XTHEADBA-LABEL: addshl_5_7:
686+
; RV32XTHEADBA: # %bb.0:
687+
; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 2
688+
; RV32XTHEADBA-NEXT: slli a0, a0, 5
689+
; RV32XTHEADBA-NEXT: ret
678690
%c = shl i32 %a, 5
679691
%d = shl i32 %b, 7
680692
%e = add i32 %c, %d
681693
ret i32 %e
682694
}
683695

684696
define i32 @addshl_5_8(i32 %a, i32 %b) {
685-
; CHECK-LABEL: addshl_5_8:
686-
; CHECK: # %bb.0:
687-
; CHECK-NEXT: slli a0, a0, 5
688-
; CHECK-NEXT: slli a1, a1, 8
689-
; CHECK-NEXT: add a0, a0, a1
690-
; CHECK-NEXT: ret
697+
; RV32I-LABEL: addshl_5_8:
698+
; RV32I: # %bb.0:
699+
; RV32I-NEXT: slli a0, a0, 5
700+
; RV32I-NEXT: slli a1, a1, 8
701+
; RV32I-NEXT: add a0, a0, a1
702+
; RV32I-NEXT: ret
703+
;
704+
; RV32XTHEADBA-LABEL: addshl_5_8:
705+
; RV32XTHEADBA: # %bb.0:
706+
; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
707+
; RV32XTHEADBA-NEXT: slli a0, a0, 5
708+
; RV32XTHEADBA-NEXT: ret
691709
%c = shl i32 %a, 5
692710
%d = shl i32 %b, 8
693711
%e = add i32 %c, %d

llvm/test/CodeGen/RISCV/rv64xtheadba.ll

Lines changed: 76 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,77 +1104,113 @@ define i64 @add8192(i64 %a) {
11041104
}
11051105

11061106
define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) {
1107-
; CHECK-LABEL: addshl32_5_6:
1108-
; CHECK: # %bb.0:
1109-
; CHECK-NEXT: slli a0, a0, 5
1110-
; CHECK-NEXT: slli a1, a1, 6
1111-
; CHECK-NEXT: addw a0, a0, a1
1112-
; CHECK-NEXT: ret
1107+
; RV64I-LABEL: addshl32_5_6:
1108+
; RV64I: # %bb.0:
1109+
; RV64I-NEXT: slli a0, a0, 5
1110+
; RV64I-NEXT: slli a1, a1, 6
1111+
; RV64I-NEXT: addw a0, a0, a1
1112+
; RV64I-NEXT: ret
1113+
;
1114+
; RV64XTHEADBA-LABEL: addshl32_5_6:
1115+
; RV64XTHEADBA: # %bb.0:
1116+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 1
1117+
; RV64XTHEADBA-NEXT: slliw a0, a0, 5
1118+
; RV64XTHEADBA-NEXT: ret
11131119
%c = shl i32 %a, 5
11141120
%d = shl i32 %b, 6
11151121
%e = add i32 %c, %d
11161122
ret i32 %e
11171123
}
11181124

11191125
define i64 @addshl64_5_6(i64 %a, i64 %b) {
1120-
; CHECK-LABEL: addshl64_5_6:
1121-
; CHECK: # %bb.0:
1122-
; CHECK-NEXT: slli a0, a0, 5
1123-
; CHECK-NEXT: slli a1, a1, 6
1124-
; CHECK-NEXT: add a0, a0, a1
1125-
; CHECK-NEXT: ret
1126+
; RV64I-LABEL: addshl64_5_6:
1127+
; RV64I: # %bb.0:
1128+
; RV64I-NEXT: slli a0, a0, 5
1129+
; RV64I-NEXT: slli a1, a1, 6
1130+
; RV64I-NEXT: add a0, a0, a1
1131+
; RV64I-NEXT: ret
1132+
;
1133+
; RV64XTHEADBA-LABEL: addshl64_5_6:
1134+
; RV64XTHEADBA: # %bb.0:
1135+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 1
1136+
; RV64XTHEADBA-NEXT: slli a0, a0, 5
1137+
; RV64XTHEADBA-NEXT: ret
11261138
%c = shl i64 %a, 5
11271139
%d = shl i64 %b, 6
11281140
%e = add i64 %c, %d
11291141
ret i64 %e
11301142
}
11311143

11321144
define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) {
1133-
; CHECK-LABEL: addshl32_5_7:
1134-
; CHECK: # %bb.0:
1135-
; CHECK-NEXT: slli a0, a0, 5
1136-
; CHECK-NEXT: slli a1, a1, 7
1137-
; CHECK-NEXT: addw a0, a0, a1
1138-
; CHECK-NEXT: ret
1145+
; RV64I-LABEL: addshl32_5_7:
1146+
; RV64I: # %bb.0:
1147+
; RV64I-NEXT: slli a0, a0, 5
1148+
; RV64I-NEXT: slli a1, a1, 7
1149+
; RV64I-NEXT: addw a0, a0, a1
1150+
; RV64I-NEXT: ret
1151+
;
1152+
; RV64XTHEADBA-LABEL: addshl32_5_7:
1153+
; RV64XTHEADBA: # %bb.0:
1154+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 2
1155+
; RV64XTHEADBA-NEXT: slliw a0, a0, 5
1156+
; RV64XTHEADBA-NEXT: ret
11391157
%c = shl i32 %a, 5
11401158
%d = shl i32 %b, 7
11411159
%e = add i32 %c, %d
11421160
ret i32 %e
11431161
}
11441162

11451163
define i64 @addshl64_5_7(i64 %a, i64 %b) {
1146-
; CHECK-LABEL: addshl64_5_7:
1147-
; CHECK: # %bb.0:
1148-
; CHECK-NEXT: slli a0, a0, 5
1149-
; CHECK-NEXT: slli a1, a1, 7
1150-
; CHECK-NEXT: add a0, a0, a1
1151-
; CHECK-NEXT: ret
1164+
; RV64I-LABEL: addshl64_5_7:
1165+
; RV64I: # %bb.0:
1166+
; RV64I-NEXT: slli a0, a0, 5
1167+
; RV64I-NEXT: slli a1, a1, 7
1168+
; RV64I-NEXT: add a0, a0, a1
1169+
; RV64I-NEXT: ret
1170+
;
1171+
; RV64XTHEADBA-LABEL: addshl64_5_7:
1172+
; RV64XTHEADBA: # %bb.0:
1173+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 2
1174+
; RV64XTHEADBA-NEXT: slli a0, a0, 5
1175+
; RV64XTHEADBA-NEXT: ret
11521176
%c = shl i64 %a, 5
11531177
%d = shl i64 %b, 7
11541178
%e = add i64 %c, %d
11551179
ret i64 %e
11561180
}
11571181

11581182
define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) {
1159-
; CHECK-LABEL: addshl32_5_8:
1160-
; CHECK: # %bb.0:
1161-
; CHECK-NEXT: slli a0, a0, 5
1162-
; CHECK-NEXT: slli a1, a1, 8
1163-
; CHECK-NEXT: addw a0, a0, a1
1164-
; CHECK-NEXT: ret
1183+
; RV64I-LABEL: addshl32_5_8:
1184+
; RV64I: # %bb.0:
1185+
; RV64I-NEXT: slli a0, a0, 5
1186+
; RV64I-NEXT: slli a1, a1, 8
1187+
; RV64I-NEXT: addw a0, a0, a1
1188+
; RV64I-NEXT: ret
1189+
;
1190+
; RV64XTHEADBA-LABEL: addshl32_5_8:
1191+
; RV64XTHEADBA: # %bb.0:
1192+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
1193+
; RV64XTHEADBA-NEXT: slliw a0, a0, 5
1194+
; RV64XTHEADBA-NEXT: ret
11651195
%c = shl i32 %a, 5
11661196
%d = shl i32 %b, 8
11671197
%e = add i32 %c, %d
11681198
ret i32 %e
11691199
}
11701200

11711201
define i64 @addshl64_5_8(i64 %a, i64 %b) {
1172-
; CHECK-LABEL: addshl64_5_8:
1173-
; CHECK: # %bb.0:
1174-
; CHECK-NEXT: slli a0, a0, 5
1175-
; CHECK-NEXT: slli a1, a1, 8
1176-
; CHECK-NEXT: add a0, a0, a1
1177-
; CHECK-NEXT: ret
1202+
; RV64I-LABEL: addshl64_5_8:
1203+
; RV64I: # %bb.0:
1204+
; RV64I-NEXT: slli a0, a0, 5
1205+
; RV64I-NEXT: slli a1, a1, 8
1206+
; RV64I-NEXT: add a0, a0, a1
1207+
; RV64I-NEXT: ret
1208+
;
1209+
; RV64XTHEADBA-LABEL: addshl64_5_8:
1210+
; RV64XTHEADBA: # %bb.0:
1211+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
1212+
; RV64XTHEADBA-NEXT: slli a0, a0, 5
1213+
; RV64XTHEADBA-NEXT: ret
11781214
%c = shl i64 %a, 5
11791215
%d = shl i64 %b, 8
11801216
%e = add i64 %c, %d
@@ -1192,9 +1228,8 @@ define i64 @sh6_sh3_add1(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
11921228
;
11931229
; RV64XTHEADBA-LABEL: sh6_sh3_add1:
11941230
; RV64XTHEADBA: # %bb.0: # %entry
1195-
; RV64XTHEADBA-NEXT: slli a1, a1, 6
1196-
; RV64XTHEADBA-NEXT: th.addsl a1, a1, a2, 3
1197-
; RV64XTHEADBA-NEXT: add a0, a1, a0
1231+
; RV64XTHEADBA-NEXT: th.addsl a1, a2, a1, 3
1232+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
11981233
; RV64XTHEADBA-NEXT: ret
11991234
entry:
12001235
%shl = shl i64 %z, 3
@@ -1238,9 +1273,8 @@ define i64 @sh6_sh3_add3(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
12381273
;
12391274
; RV64XTHEADBA-LABEL: sh6_sh3_add3:
12401275
; RV64XTHEADBA: # %bb.0: # %entry
1241-
; RV64XTHEADBA-NEXT: slli a1, a1, 6
1242-
; RV64XTHEADBA-NEXT: th.addsl a1, a1, a2, 3
1243-
; RV64XTHEADBA-NEXT: add a0, a0, a1
1276+
; RV64XTHEADBA-NEXT: th.addsl a1, a2, a1, 3
1277+
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
12441278
; RV64XTHEADBA-NEXT: ret
12451279
entry:
12461280
%shl = shl i64 %z, 3

llvm/test/CodeGen/RISCV/xqciac.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,8 @@ define dso_local i32 @shladdc1c2(i32 %a, i32 %b) local_unnamed_addr #0 {
361361
;
362362
; RV32IMXQCIAC-LABEL: shladdc1c2:
363363
; RV32IMXQCIAC: # %bb.0: # %entry
364-
; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 5
365-
; RV32IMXQCIAC-NEXT: slli a0, a0, 26
364+
; RV32IMXQCIAC-NEXT: slli a1, a1, 26
365+
; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 31
366366
; RV32IMXQCIAC-NEXT: ret
367367
;
368368
; RV32IZBAMXQCIAC-LABEL: shladdc1c2:

0 commit comments

Comments
 (0)