Skip to content

Commit fc79e27

Browse files
committed
[SelectionDAG] Use Magic Algorithm for Splitting UDIV/UREM by Constant
For integer types twice as large as a legal type, we have previously generated a library call if another splitting technique was not applicable. With this change, we use an adaption of the Magic algorithm. This algorithm is also used for UDIV/UREM by constants on legal types. The implementation introduced here is a simple port of the already existing implementation to types twice the size of a legal type. The core idea of this algorithm is to replace (udiv x c) for a constant c with the bits higher or equal to the s-th bit of the multiplication of x by (2^s + o)/c for some s and o. More details are available in Henry S. Warren, Jr.: "Hacker's Delight", chapter 10. An efficient handling of UDIV/UREM by constants on types twice as large as a legal type is mostly relevant for 32-bit platforms. But some projects may also benefit on 64-bit platforms. For example, the `fmt` library for C++ uses 128-bit unsigned divisions by 100 and 10000, which have not been covered by the previously existing optimizations. Closes #137514.
1 parent ea788d4 commit fc79e27

File tree

16 files changed

+2396
-943
lines changed

16 files changed

+2396
-943
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5844,6 +5844,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
58445844
bool expandUDIVREMByConstantViaUREMDecomposition(
58455845
SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
58465846
SelectionDAG &DAG, SDValue LL, SDValue LH) const;
5847+
5848+
bool expandUDIVREMByConstantViaUMulHiMagic(SDNode *N, const APInt &Divisor,
5849+
SmallVectorImpl<SDValue> &Result,
5850+
EVT HiLoVT, SelectionDAG &DAG,
5851+
SDValue LL, SDValue LH) const;
58475852
};
58485853

58495854
/// Given an LLVM IR type and return type attributes, compute the return value

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8137,6 +8137,113 @@ bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
81378137
return true;
81388138
}
81398139

8140+
bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8141+
SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8142+
EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8143+
8144+
SDValue N0 = N->getOperand(0);
8145+
EVT VT = N0->getValueType(0);
8146+
SDLoc DL{N};
8147+
8148+
assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8149+
8150+
// This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8151+
auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8152+
const APInt &Const,
8153+
SmallVectorImpl<SDValue> &Result) {
8154+
SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8155+
SDValue RHS = DAG.getConstant(Const, DL, VT);
8156+
auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8157+
return expandMUL_LOHI(Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8158+
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
8159+
LL, LH, RL, RH);
8160+
};
8161+
8162+
// This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8163+
auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8164+
SDValue RH) {
8165+
SDValue AddSubNode =
8166+
DAG.getNode(Opc == ISD::ADD ? ISD::UADDO : ISD::USUBO, DL,
8167+
DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8168+
SDValue OutL, OutH, Overflow;
8169+
expandUADDSUBO(AddSubNode.getNode(), OutL, Overflow, DAG);
8170+
SDValue WithOverflow = DAG.getNode(
8171+
Opc, DL, HiLoVT, LH, DAG.getZExtOrTrunc(Overflow, DL, HiLoVT));
8172+
OutH = DAG.getNode(Opc, DL, HiLoVT, WithOverflow, RH);
8173+
return std::make_pair(OutL, OutH);
8174+
};
8175+
8176+
// This helper creates a SRL of the pair (LL, LH) by Shift.
8177+
auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8178+
unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8179+
if (Shift < HBitWidth) {
8180+
SDValue ShAmt = DAG.getShiftAmountConstant(Shift, HiLoVT, DL);
8181+
SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8182+
SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8183+
return std::make_pair(ResL, ResH);
8184+
}
8185+
SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8186+
if (Shift == HBitWidth)
8187+
return std::make_pair(LH, Zero);
8188+
assert(Shift - HBitWidth < HBitWidth &&
8189+
"We shouldn't generate an undefined shift");
8190+
SDValue ShAmt = DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, DL);
8191+
return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8192+
};
8193+
8194+
// Knowledge of leading zeros may help to reduce the multiplier.
8195+
unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8196+
8197+
UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8198+
Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8199+
8200+
assert(!LL == !LH && "Expected both input halves or no input halves!");
8201+
if (!LL)
8202+
std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8203+
SDValue QL = LL;
8204+
SDValue QH = LH;
8205+
if (Magics.PreShift != 0)
8206+
std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8207+
8208+
SmallVector<SDValue, 2> UMulResult;
8209+
if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8210+
return false;
8211+
8212+
QL = UMulResult[2];
8213+
QH = UMulResult[3];
8214+
8215+
if (Magics.IsAdd) {
8216+
auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8217+
std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8218+
std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8219+
}
8220+
8221+
if (Magics.PostShift != 0)
8222+
std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8223+
8224+
unsigned Opcode = N->getOpcode();
8225+
if (Opcode != ISD::UREM) {
8226+
Result.push_back(QL);
8227+
Result.push_back(QH);
8228+
}
8229+
8230+
if (Opcode != ISD::UDIV) {
8231+
SmallVector<SDValue, 2> MulResult;
8232+
if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8233+
return false;
8234+
8235+
assert(MulResult.size() == 2);
8236+
8237+
auto [RemL, RemH] =
8238+
MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8239+
8240+
Result.push_back(RemL);
8241+
Result.push_back(RemH);
8242+
}
8243+
8244+
return true;
8245+
}
8246+
81408247
bool TargetLowering::expandDIVREMByConstant(SDNode *N,
81418248
SmallVectorImpl<SDValue> &Result,
81428249
EVT HiLoVT, SelectionDAG &DAG,
@@ -8174,6 +8281,10 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
81748281
DAG, LL, LH))
81758282
return true;
81768283

8284+
if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8285+
LH))
8286+
return true;
8287+
81778288
return false;
81788289
}
81798290

0 commit comments

Comments
 (0)