Skip to content

Commit 5bdd489

Browse files
committed
[SelectionDAG] Use Magic Algorithm for Splitting UDIV/UREM by Constant
For integer types twice as large as a legal type, we have previously generated a library call if another splitting technique was not applicable. With this change, we use an adaption of the Magic algorithm. This algorithm is also used for UDIV/UREM by constants on legal types. The implementation introduced here is a simple port of the already existing implementation to types twice the size of a legal type. The core idea of this algorithm is to replace (udiv x c) for a constant c with the bits higher or equal to the s-th bit of the multiplication of x by (2^s + o)/c for some s and o. More details are available in Henry S. Warren, Jr.: "Hacker's Delight", chapter 10. An efficient handling of UDIV/UREM by constants on types twice as large as a legal type is mostly relevant for 32-bit platforms. But some projects may also benefit on 64-bit platforms. For example, the `fmt` library for C++ uses 128-bit unsigned divisions by 100 and 10000, which have not been covered by the previously existing optimizations. Closes #137514.
1 parent 04d4d45 commit 5bdd489

File tree

15 files changed

+2393
-944
lines changed

15 files changed

+2393
-944
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8137,6 +8137,115 @@ static bool expandUDIVREMByConstantViaUREMDecomposition(
81378137
return true;
81388138
}
81398139

8140+
static bool
8141+
expandUDIVREMByConstantViaUMulHiMagic(SDNode *N, const APInt &Divisor,
8142+
SmallVectorImpl<SDValue> &Result,
8143+
EVT HiLoVT, SelectionDAG &DAG, SDValue LL,
8144+
SDValue LH, const TargetLowering &TLI) {
8145+
8146+
SDValue N0 = N->getOperand(0);
8147+
EVT VT = N0->getValueType(0);
8148+
SDLoc DL{N};
8149+
8150+
assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8151+
8152+
// This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8153+
auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8154+
const APInt &Const,
8155+
SmallVectorImpl<SDValue> &Result) {
8156+
SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8157+
SDValue RHS = DAG.getConstant(Const, DL, VT);
8158+
auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8159+
return TLI.expandMUL_LOHI(
8160+
Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8161+
TargetLowering::MulExpansionKind::OnlyLegalOrCustom, LL, LH, RL, RH);
8162+
};
8163+
8164+
// This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8165+
auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8166+
SDValue RH) {
8167+
SDValue AddSubNode =
8168+
DAG.getNode(Opc == ISD::ADD ? ISD::UADDO : ISD::USUBO, DL,
8169+
DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8170+
SDValue OutL, OutH, Overflow;
8171+
TLI.expandUADDSUBO(AddSubNode.getNode(), OutL, Overflow, DAG);
8172+
SDValue WithOverflow = DAG.getNode(
8173+
Opc, DL, HiLoVT, LH, DAG.getZExtOrTrunc(Overflow, DL, HiLoVT));
8174+
OutH = DAG.getNode(Opc, DL, HiLoVT, WithOverflow, RH);
8175+
return std::make_pair(OutL, OutH);
8176+
};
8177+
8178+
// This helper creates a SRL of the pair (LL, LH) by Shift.
8179+
auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8180+
unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8181+
if (Shift < HBitWidth) {
8182+
SDValue ShAmt = DAG.getConstant(Shift, DL, HiLoVT);
8183+
SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8184+
SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8185+
return std::make_pair(ResL, ResH);
8186+
}
8187+
SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8188+
if (Shift == HBitWidth)
8189+
return std::make_pair(LH, Zero);
8190+
assert(Shift - HBitWidth < HBitWidth &&
8191+
"We shouldn't generate an undefined shift");
8192+
SDValue ShAmt = DAG.getConstant(Shift - HBitWidth, DL, HiLoVT);
8193+
return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8194+
};
8195+
8196+
// Knowledge of leading zeros may help to reduce the multiplier.
8197+
unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8198+
8199+
UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8200+
Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8201+
8202+
assert(!LL == !LH && "Expected both input halves or no input halves!");
8203+
if (!LL)
8204+
std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8205+
SDValue QL = LL;
8206+
SDValue QH = LH;
8207+
if (Magics.PreShift != 0)
8208+
std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8209+
8210+
SmallVector<SDValue, 2> UMulResult;
8211+
if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8212+
return false;
8213+
8214+
QL = UMulResult[2];
8215+
QH = UMulResult[3];
8216+
8217+
if (Magics.IsAdd) {
8218+
auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8219+
std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8220+
std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8221+
}
8222+
8223+
if (Magics.PostShift != 0)
8224+
std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8225+
8226+
unsigned Opcode = N->getOpcode();
8227+
if (Opcode != ISD::UREM) {
8228+
Result.push_back(QL);
8229+
Result.push_back(QH);
8230+
}
8231+
8232+
if (Opcode != ISD::UDIV) {
8233+
SmallVector<SDValue, 2> MulResult;
8234+
if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8235+
return false;
8236+
8237+
assert(MulResult.size() == 2);
8238+
8239+
auto [RemL, RemH] =
8240+
MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8241+
8242+
Result.push_back(RemL);
8243+
Result.push_back(RemH);
8244+
}
8245+
8246+
return true;
8247+
}
8248+
81408249
bool TargetLowering::expandDIVREMByConstant(SDNode *N,
81418250
SmallVectorImpl<SDValue> &Result,
81428251
EVT HiLoVT, SelectionDAG &DAG,
@@ -8174,6 +8283,10 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
81748283
DAG, LL, LH, *this))
81758284
return true;
81768285

8286+
if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8287+
LH, *this))
8288+
return true;
8289+
81778290
return false;
81788291
}
81798292

0 commit comments

Comments
 (0)