Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3204,9 +3204,7 @@ static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget,
// If we have a SHXADD instruction, prefer that over reassociating an ADDI.
assert(Shift.getOpcode() == ISD::SHL);
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
if ((ShiftAmt <= 3 &&
(Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa())) ||
(ShiftAmt >= 4 && ShiftAmt <= 7 && Subtarget.hasVendorXqciac()))
if (ShiftAmt <= 7 && Subtarget.hasShlAdd(ShiftAmt))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why this limit of 7. Perhaps we can remove it?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#150135 (comment) was Craig's explanation for why he added the 7 limit.

return false;

// All users of the ADDI should be load/store.
Expand Down
37 changes: 8 additions & 29 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15349,11 +15349,9 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
const bool HasStdExtZba = Subtarget.hasStdExtZba();
const bool HasVendorXAndesPerf = Subtarget.hasVendorXAndesPerf();
const bool HasVendorXqciac = Subtarget.hasVendorXqciac();
// Perform this optimization only in the zba/xandesperf/xqciac extension.
if (!HasStdExtZba && !HasVendorXAndesPerf && !HasVendorXqciac)
// Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
// extension.
if (!Subtarget.hasShlAdd(3))
return SDValue();

// Skip for vector types and larger types.
Expand All @@ -15379,16 +15377,7 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
return SDValue();

int64_t Diff = std::abs(C0 - C1);
bool IsShXaddDiff = Diff == 1 || Diff == 2 || Diff == 3;
bool HasShXadd = HasStdExtZba || HasVendorXAndesPerf;

// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
if ((!IsShXaddDiff && HasShXadd && !HasVendorXqciac) ||
(IsShXaddDiff && !HasShXadd && HasVendorXqciac))
return SDValue();

// Skip if QC_SHLADD is not applicable.
if (Diff == 0 || Diff > 31)
if (!Subtarget.hasShlAdd(Diff))
return SDValue();

// Build nodes.
Expand Down Expand Up @@ -15445,7 +15434,7 @@ static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other,
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// Perform this optimization only in the zba extension.
if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba())
if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
return SDValue();

// Skip for vector types and larger types.
Expand Down Expand Up @@ -16375,17 +16364,13 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
return SDValue();

const bool HasShlAdd = Subtarget.hasStdExtZba() ||
Subtarget.hasVendorXTHeadBa() ||
Subtarget.hasVendorXAndesPerf();

// WARNING: The code below is knowingly incorrect with regards to undef semantics.
// We're adding additional uses of X here, and in principle, we should be freezing
// X before doing so. However, adding freeze here causes real regressions, and no
// other target properly freezes X in these cases either.
SDValue X = N->getOperand(0);

if (HasShlAdd) {
if (Subtarget.hasShlAdd(3)) {
for (uint64_t Divisor : {3, 5, 9}) {
if (MulAmt % Divisor != 0)
continue;
Expand Down Expand Up @@ -21333,14 +21318,8 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));

bool IsShXAdd =
(Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;

// Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
N->user_begin()->getOpcode() == ISD::ADD &&
!isUsedByLdSt(*N->user_begin(), nullptr) &&
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
Expand Down Expand Up @@ -24398,7 +24377,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return true;

// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
(Imm - 8).isPowerOf2()))
return true;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4492,7 +4492,7 @@ void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
.addReg(DestReg, RegState::Kill)
.addImm(ShiftAmount)
.setMIFlag(Flag);
} else if (STI.hasStdExtZba() &&
} else if (STI.hasShlAdd(3) &&
((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||
(Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||
(Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
hasShortForwardBranchOpt();
}

bool hasShlAdd(int64_t ShAmt) const {
if (ShAmt <= 0)
return false;
if (ShAmt <= 3)
return HasStdExtZba || HasVendorXAndesPerf || HasVendorXTHeadBa;
return ShAmt <= 31 && HasVendorXqciac;
}

bool is64Bit() const { return IsRV64; }
MVT getXLenVT() const {
return is64Bit() ? MVT::i64 : MVT::i32;
Expand Down
54 changes: 36 additions & 18 deletions llvm/test/CodeGen/RISCV/rv32xtheadba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -656,38 +656,56 @@ define i32 @add8192(i32 %a) {
}

define i32 @addshl_5_6(i32 %a, i32 %b) {
; CHECK-LABEL: addshl_5_6:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 6
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV32I-LABEL: addshl_5_6:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 5
; RV32I-NEXT: slli a1, a1, 6
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: ret
;
; RV32XTHEADBA-LABEL: addshl_5_6:
; RV32XTHEADBA: # %bb.0:
; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 1
; RV32XTHEADBA-NEXT: slli a0, a0, 5
; RV32XTHEADBA-NEXT: ret
%c = shl i32 %a, 5
%d = shl i32 %b, 6
%e = add i32 %c, %d
ret i32 %e
}

define i32 @addshl_5_7(i32 %a, i32 %b) {
; CHECK-LABEL: addshl_5_7:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 7
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV32I-LABEL: addshl_5_7:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 5
; RV32I-NEXT: slli a1, a1, 7
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: ret
;
; RV32XTHEADBA-LABEL: addshl_5_7:
; RV32XTHEADBA: # %bb.0:
; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 2
; RV32XTHEADBA-NEXT: slli a0, a0, 5
; RV32XTHEADBA-NEXT: ret
%c = shl i32 %a, 5
%d = shl i32 %b, 7
%e = add i32 %c, %d
ret i32 %e
}

define i32 @addshl_5_8(i32 %a, i32 %b) {
; CHECK-LABEL: addshl_5_8:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 8
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV32I-LABEL: addshl_5_8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 5
; RV32I-NEXT: slli a1, a1, 8
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: ret
;
; RV32XTHEADBA-LABEL: addshl_5_8:
; RV32XTHEADBA: # %bb.0:
; RV32XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
; RV32XTHEADBA-NEXT: slli a0, a0, 5
; RV32XTHEADBA-NEXT: ret
%c = shl i32 %a, 5
%d = shl i32 %b, 8
%e = add i32 %c, %d
Expand Down
118 changes: 76 additions & 42 deletions llvm/test/CodeGen/RISCV/rv64xtheadba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1104,77 +1104,113 @@ define i64 @add8192(i64 %a) {
}

define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) {
; CHECK-LABEL: addshl32_5_6:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 6
; CHECK-NEXT: addw a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addshl32_5_6:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: slli a1, a1, 6
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBA-LABEL: addshl32_5_6:
; RV64XTHEADBA: # %bb.0:
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 1
; RV64XTHEADBA-NEXT: slliw a0, a0, 5
; RV64XTHEADBA-NEXT: ret
%c = shl i32 %a, 5
%d = shl i32 %b, 6
%e = add i32 %c, %d
ret i32 %e
}

define i64 @addshl64_5_6(i64 %a, i64 %b) {
; CHECK-LABEL: addshl64_5_6:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 6
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addshl64_5_6:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: slli a1, a1, 6
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBA-LABEL: addshl64_5_6:
; RV64XTHEADBA: # %bb.0:
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 1
; RV64XTHEADBA-NEXT: slli a0, a0, 5
; RV64XTHEADBA-NEXT: ret
%c = shl i64 %a, 5
%d = shl i64 %b, 6
%e = add i64 %c, %d
ret i64 %e
}

define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) {
; CHECK-LABEL: addshl32_5_7:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 7
; CHECK-NEXT: addw a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addshl32_5_7:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: slli a1, a1, 7
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBA-LABEL: addshl32_5_7:
; RV64XTHEADBA: # %bb.0:
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 2
; RV64XTHEADBA-NEXT: slliw a0, a0, 5
; RV64XTHEADBA-NEXT: ret
%c = shl i32 %a, 5
%d = shl i32 %b, 7
%e = add i32 %c, %d
ret i32 %e
}

define i64 @addshl64_5_7(i64 %a, i64 %b) {
; CHECK-LABEL: addshl64_5_7:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 7
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addshl64_5_7:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: slli a1, a1, 7
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBA-LABEL: addshl64_5_7:
; RV64XTHEADBA: # %bb.0:
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 2
; RV64XTHEADBA-NEXT: slli a0, a0, 5
; RV64XTHEADBA-NEXT: ret
%c = shl i64 %a, 5
%d = shl i64 %b, 7
%e = add i64 %c, %d
ret i64 %e
}

define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) {
; CHECK-LABEL: addshl32_5_8:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 8
; CHECK-NEXT: addw a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addshl32_5_8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBA-LABEL: addshl32_5_8:
; RV64XTHEADBA: # %bb.0:
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
; RV64XTHEADBA-NEXT: slliw a0, a0, 5
; RV64XTHEADBA-NEXT: ret
%c = shl i32 %a, 5
%d = shl i32 %b, 8
%e = add i32 %c, %d
ret i32 %e
}

define i64 @addshl64_5_8(i64 %a, i64 %b) {
; CHECK-LABEL: addshl64_5_8:
; CHECK: # %bb.0:
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: slli a1, a1, 8
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: addshl64_5_8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 5
; RV64I-NEXT: slli a1, a1, 8
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: ret
;
; RV64XTHEADBA-LABEL: addshl64_5_8:
; RV64XTHEADBA: # %bb.0:
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
; RV64XTHEADBA-NEXT: slli a0, a0, 5
; RV64XTHEADBA-NEXT: ret
%c = shl i64 %a, 5
%d = shl i64 %b, 8
%e = add i64 %c, %d
Expand All @@ -1192,9 +1228,8 @@ define i64 @sh6_sh3_add1(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
;
; RV64XTHEADBA-LABEL: sh6_sh3_add1:
; RV64XTHEADBA: # %bb.0: # %entry
; RV64XTHEADBA-NEXT: slli a1, a1, 6
; RV64XTHEADBA-NEXT: th.addsl a1, a1, a2, 3
; RV64XTHEADBA-NEXT: add a0, a1, a0
; RV64XTHEADBA-NEXT: th.addsl a1, a2, a1, 3
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
; RV64XTHEADBA-NEXT: ret
entry:
%shl = shl i64 %z, 3
Expand Down Expand Up @@ -1238,9 +1273,8 @@ define i64 @sh6_sh3_add3(i64 noundef %x, i64 noundef %y, i64 noundef %z) {
;
; RV64XTHEADBA-LABEL: sh6_sh3_add3:
; RV64XTHEADBA: # %bb.0: # %entry
; RV64XTHEADBA-NEXT: slli a1, a1, 6
; RV64XTHEADBA-NEXT: th.addsl a1, a1, a2, 3
; RV64XTHEADBA-NEXT: add a0, a0, a1
; RV64XTHEADBA-NEXT: th.addsl a1, a2, a1, 3
; RV64XTHEADBA-NEXT: th.addsl a0, a0, a1, 3
; RV64XTHEADBA-NEXT: ret
entry:
%shl = shl i64 %z, 3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/xqciac.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,8 @@ define dso_local i32 @shladdc1c2(i32 %a, i32 %b) local_unnamed_addr #0 {
;
; RV32IMXQCIAC-LABEL: shladdc1c2:
; RV32IMXQCIAC: # %bb.0: # %entry
; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 5
; RV32IMXQCIAC-NEXT: slli a0, a0, 26
; RV32IMXQCIAC-NEXT: slli a1, a1, 26
; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 31
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: shladdc1c2:
Expand Down
Loading