Skip to content

Commit 3a608ef

Browse files
committed
[TargetLowering] Inline one of the signatures of forceExpandWideMul into its callers. NFC
There are two calls sites. One uses the non-libcall part and the other uses the libcall part. Sink those pieces into their callers. After this I'm going to merge the non-libcall part of the other forceExpandWideMul with the code from LegalizeIntegerTypes into a new helper.
1 parent 886adf8 commit 3a608ef

File tree

3 files changed

+64
-103
lines changed

3 files changed

+64
-103
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5499,20 +5499,10 @@ class TargetLowering : public TargetLoweringBase {
54995499
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
55005500
SelectionDAG &DAG) const;
55015501

5502-
/// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or
5503-
/// brute force via a wide multiplication. The expansion works by
5504-
/// attempting to do a multiplication on a wider type twice the size of the
5505-
/// original operands. LL and LH represent the lower and upper halves of the
5506-
/// first operand. RL and RH represent the lower and upper halves of the
5507-
/// second operand. The upper and lower halves of the result are stored in Lo
5508-
/// and Hi.
5509-
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
5510-
EVT WideVT, const SDValue LL, const SDValue LH,
5511-
const SDValue RL, const SDValue RH, SDValue &Lo,
5512-
SDValue &Hi) const;
5513-
5514-
/// Same as above, but creates the upper halves of each operand by
5515-
/// sign/zero-extending the operands.
5502+
/// Calculate full product of LHS and RHS either via a libcall or through
5503+
/// brute force expansion of the multiplication. The expansion works by
5504+
/// splitting the 2 inputs into 4 pieces that we can multiply and add together
5505+
/// without needing MULH or MUL_LOHI.
55165506
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed,
55175507
const SDValue LHS, const SDValue RHS, SDValue &Lo,
55185508
SDValue &Hi) const;

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4294,10 +4294,44 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
42944294
LC = RTLIB::MUL_I128;
42954295

42964296
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
4297-
// Perform a wide multiplication where the wide type is the original VT and
4298-
// the 4 parts are the split arguments.
4299-
TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, VT, LL, LH, RL, RH, Lo,
4300-
Hi);
4297+
// We'll expand the multiplication by brute force because we have no other
4298+
// options. This is a trivially-generalized version of the code from
4299+
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
4300+
// 4.3.1).
4301+
unsigned Bits = NVT.getSizeInBits();
4302+
unsigned HalfBits = Bits >> 1;
4303+
SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl,
4304+
NVT);
4305+
SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
4306+
SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
4307+
4308+
SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
4309+
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
4310+
4311+
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
4312+
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
4313+
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
4314+
SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
4315+
4316+
SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
4317+
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH);
4318+
SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
4319+
SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
4320+
4321+
SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
4322+
DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
4323+
SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
4324+
4325+
SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
4326+
DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH),
4327+
DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
4328+
Lo = DAG.getNode(ISD::ADD, dl, NVT, TL,
4329+
DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
4330+
4331+
Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
4332+
DAG.getNode(ISD::ADD, dl, NVT,
4333+
DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
4334+
DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
43014335
return;
43024336
}
43034337

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 22 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -10858,14 +10858,14 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
1085810858
}
1085910859

1086010860
void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10861-
bool Signed, EVT WideVT,
10862-
const SDValue LL, const SDValue LH,
10863-
const SDValue RL, const SDValue RH,
10864-
SDValue &Lo, SDValue &Hi) const {
10861+
bool Signed, const SDValue LHS,
10862+
const SDValue RHS, SDValue &Lo,
10863+
SDValue &Hi) const {
10864+
EVT VT = LHS.getValueType();
10865+
assert(RHS.getValueType() == VT && "Mismatching operand types");
10866+
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
1086510867
// We can fall back to a libcall with an illegal type for the MUL if we
1086610868
// have a libcall big enough.
10867-
// Also, we can fall back to a division in some cases, but that's a big
10868-
// performance hit in the general case.
1086910869
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1087010870
if (WideVT == MVT::i16)
1087110871
LC = RTLIB::MUL_I16;
@@ -10876,47 +10876,20 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
1087610876
else if (WideVT == MVT::i128)
1087710877
LC = RTLIB::MUL_I128;
1087810878

10879-
if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10880-
// We'll expand the multiplication by brute force because we have no other
10881-
// options. This is a trivially-generalized version of the code from
10882-
// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10883-
// 4.3.1).
10884-
EVT VT = LL.getValueType();
10885-
unsigned Bits = VT.getSizeInBits();
10886-
unsigned HalfBits = Bits >> 1;
10887-
SDValue Mask =
10888-
DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10889-
SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10890-
SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10891-
10892-
SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10893-
SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10894-
10895-
SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10896-
SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10897-
SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10898-
SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10899-
10900-
SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10901-
DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10902-
SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10903-
SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10904-
10905-
SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10906-
DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10907-
SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10908-
10909-
SDValue W =
10910-
DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10911-
DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10912-
Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10913-
DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10914-
10915-
Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10916-
DAG.getNode(ISD::ADD, dl, VT,
10917-
DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10918-
DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10919-
} else {
10879+
if (LC != RTLIB::UNKNOWN_LIBCALL && getLibcallName(LC)) {
10880+
SDValue HiLHS, HiRHS;
10881+
if (Signed) {
10882+
// The high part is obtained by SRA'ing all but one of the bits of low
10883+
// part.
10884+
unsigned LoSize = VT.getFixedSizeInBits();
10885+
SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
10886+
HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
10887+
HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
10888+
} else {
10889+
HiLHS = DAG.getConstant(0, dl, VT);
10890+
HiRHS = DAG.getConstant(0, dl, VT);
10891+
}
10892+
1092010893
// Attempt a libcall.
1092110894
SDValue Ret;
1092210895
TargetLowering::MakeLibCallOptions CallOptions;
@@ -10927,10 +10900,10 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
1092710900
// depending on platform endianness. This is usually handled by
1092810901
// the C calling convention, but we can't defer to it in
1092910902
// the legalizer.
10930-
SDValue Args[] = {LL, LH, RL, RH};
10903+
SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
1093110904
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
1093210905
} else {
10933-
SDValue Args[] = {LH, LL, RH, RL};
10906+
SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
1093410907
Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
1093510908
}
1093610909
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
@@ -10943,42 +10916,6 @@ void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
1094310916
Lo = Ret.getOperand(1);
1094410917
Hi = Ret.getOperand(0);
1094510918
}
10946-
}
10947-
}
10948-
10949-
void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10950-
bool Signed, const SDValue LHS,
10951-
const SDValue RHS, SDValue &Lo,
10952-
SDValue &Hi) const {
10953-
EVT VT = LHS.getValueType();
10954-
assert(RHS.getValueType() == VT && "Mismatching operand types");
10955-
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10956-
// We can fall back to a libcall with an illegal type for the MUL if we
10957-
// have a libcall big enough.
10958-
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10959-
if (WideVT == MVT::i16)
10960-
LC = RTLIB::MUL_I16;
10961-
else if (WideVT == MVT::i32)
10962-
LC = RTLIB::MUL_I32;
10963-
else if (WideVT == MVT::i64)
10964-
LC = RTLIB::MUL_I64;
10965-
else if (WideVT == MVT::i128)
10966-
LC = RTLIB::MUL_I128;
10967-
10968-
if (LC != RTLIB::UNKNOWN_LIBCALL && getLibcallName(LC)) {
10969-
SDValue HiLHS, HiRHS;
10970-
if (Signed) {
10971-
// The high part is obtained by SRA'ing all but one of the bits of low
10972-
// part.
10973-
unsigned LoSize = VT.getFixedSizeInBits();
10974-
SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
10975-
HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
10976-
HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
10977-
} else {
10978-
HiLHS = DAG.getConstant(0, dl, VT);
10979-
HiRHS = DAG.getConstant(0, dl, VT);
10980-
}
10981-
forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
1098210919
return;
1098310920
}
1098410921

0 commit comments

Comments
 (0)