Skip to content

Commit be6296e

Browse files
authored
[RISCV] Fold Zba-expanded (mul (shr exact X, C1), C2) (llvm#168019)
1 parent 2b22e9b commit be6296e

File tree

3 files changed

+82
-5
lines changed

3 files changed

+82
-5
lines changed

llvm/include/llvm/CodeGen/SDPatternMatch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,11 @@ template <typename LHS, typename RHS>
903903
inline BinaryOpc_match<LHS, RHS> m_Srl(const LHS &L, const RHS &R) {
904904
return BinaryOpc_match<LHS, RHS>(ISD::SRL, L, R);
905905
}
906+
template <typename LHS, typename RHS>
907+
inline auto m_ExactSr(const LHS &L, const RHS &R) {
908+
return m_AnyOf(BinaryOpc_match<LHS, RHS>(ISD::SRA, L, R, SDNodeFlags::Exact),
909+
BinaryOpc_match<LHS, RHS>(ISD::SRL, L, R, SDNodeFlags::Exact));
910+
}
906911

907912
template <typename LHS, typename RHS>
908913
inline BinaryOpc_match<LHS, RHS> m_Rotl(const LHS &L, const RHS &R) {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16798,9 +16798,7 @@ static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
1679816798
// because X is exact (Y >> M + 2).
1679916799
uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;
1680016800
using namespace SDPatternMatch;
16801-
return sd_match(X, m_AnyOf(m_Sra(m_Value(), m_SpecificInt(ShAmt)),
16802-
m_Srl(m_Value(), m_SpecificInt(ShAmt)))) &&
16803-
X->getFlags().hasExact();
16801+
return sd_match(X, m_ExactSr(m_Value(), m_SpecificInt(ShAmt)));
1680416802
};
1680516803
if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {
1680616804
Op = ISD::ADD;
@@ -16825,10 +16823,13 @@ static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
1682516823
SDLoc DL(N);
1682616824
EVT VT = N->getValueType(0);
1682716825
SDValue X = N->getOperand(0);
16828-
// Put the shift first if we can fold a zext into the shift forming a slli.uw.
16826+
// Put the shift first if we can fold:
16827+
// a. a zext into the shift forming a slli.uw
16828+
// b. an exact shift right forming one shorter shift or no shift at all
1682916829
using namespace SDPatternMatch;
1683016830
if (Shift != 0 &&
16831-
sd_match(X, m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
16831+
sd_match(X, m_AnyOf(m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))),
16832+
m_ExactSr(m_Value(), m_ConstInt())))) {
1683216833
X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
1683316834
Shift = 0;
1683416835
}

llvm/test/CodeGen/RISCV/rv64zba.ll

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5016,3 +5016,74 @@ define ptr @shl_add_knownbits(ptr %p, i64 %i) {
50165016
%r = getelementptr i8, ptr %p, i64 %shr
50175017
ret ptr %r
50185018
}
5019+
5020+
define i64 @exactashr1mul6(i64 %a) {
5021+
; RV64I-LABEL: exactashr1mul6:
5022+
; RV64I: # %bb.0:
5023+
; RV64I-NEXT: slli a1, a0, 1
5024+
; RV64I-NEXT: add a0, a1, a0
5025+
; RV64I-NEXT: ret
5026+
;
5027+
; RV64ZBA-LABEL: exactashr1mul6:
5028+
; RV64ZBA: # %bb.0:
5029+
; RV64ZBA-NEXT: sh1add a0, a0, a0
5030+
; RV64ZBA-NEXT: ret
5031+
;
5032+
; RV64XANDESPERF-LABEL: exactashr1mul6:
5033+
; RV64XANDESPERF: # %bb.0:
5034+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0
5035+
; RV64XANDESPERF-NEXT: ret
5036+
%c = ashr exact i64 %a, 1
5037+
%d = mul i64 %c, 6
5038+
ret i64 %d
5039+
}
5040+
5041+
define i64 @exactlshr3mul22(i64 %a) {
5042+
; RV64I-LABEL: exactlshr3mul22:
5043+
; RV64I: # %bb.0:
5044+
; RV64I-NEXT: srli a0, a0, 3
5045+
; RV64I-NEXT: li a1, 22
5046+
; RV64I-NEXT: mul a0, a0, a1
5047+
; RV64I-NEXT: ret
5048+
;
5049+
; RV64ZBA-LABEL: exactlshr3mul22:
5050+
; RV64ZBA: # %bb.0:
5051+
; RV64ZBA-NEXT: srli a0, a0, 2
5052+
; RV64ZBA-NEXT: sh2add a1, a0, a0
5053+
; RV64ZBA-NEXT: sh1add a0, a1, a0
5054+
; RV64ZBA-NEXT: ret
5055+
;
5056+
; RV64XANDESPERF-LABEL: exactlshr3mul22:
5057+
; RV64XANDESPERF: # %bb.0:
5058+
; RV64XANDESPERF-NEXT: srli a0, a0, 2
5059+
; RV64XANDESPERF-NEXT: nds.lea.w a1, a0, a0
5060+
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a1
5061+
; RV64XANDESPERF-NEXT: ret
5062+
%c = lshr exact i64 %a, 3
5063+
%d = mul i64 %c, 22
5064+
ret i64 %d
5065+
}
5066+
5067+
define i64 @exactashr1mul36(i64 %a) {
5068+
; RV64I-LABEL: exactashr1mul36:
5069+
; RV64I: # %bb.0:
5070+
; RV64I-NEXT: slli a1, a0, 1
5071+
; RV64I-NEXT: slli a0, a0, 4
5072+
; RV64I-NEXT: add a0, a0, a1
5073+
; RV64I-NEXT: ret
5074+
;
5075+
; RV64ZBA-LABEL: exactashr1mul36:
5076+
; RV64ZBA: # %bb.0:
5077+
; RV64ZBA-NEXT: slli a0, a0, 1
5078+
; RV64ZBA-NEXT: sh3add a0, a0, a0
5079+
; RV64ZBA-NEXT: ret
5080+
;
5081+
; RV64XANDESPERF-LABEL: exactashr1mul36:
5082+
; RV64XANDESPERF: # %bb.0:
5083+
; RV64XANDESPERF-NEXT: slli a0, a0, 1
5084+
; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a0
5085+
; RV64XANDESPERF-NEXT: ret
5086+
%c = ashr exact i64 %a, 1
5087+
%d = mul i64 %c, 36
5088+
ret i64 %d
5089+
}

0 commit comments

Comments
 (0)