Skip to content

Commit 531fd45

Browse files
scui-ibmShimin Cui
andauthored
[PPC] Set minimum of largest number of comparisons to use bit test for switch lowering (#155910)
Currently it is considered suitable to lower to a bit test for a set of switch case clusters when the the number of unique destinations (`NumDests`) and the number of total comparisons (`NumCmps`) satisfy: `(NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || (NumDests == 3 && NumCmps >= 6)` However it is found for some cases on powerpc, for example, when NumDests is 3, and the number of comparisons for each destination is all 2, it's not profitable to lower the switch to bit test. This is to add an option to set the minimum of largest number of comparisons to use bit test for switch lowering. --------- Co-authored-by: Shimin Cui <[email protected]>
1 parent 0621fd0 commit 531fd45

File tree

6 files changed

+260
-19
lines changed

6 files changed

+260
-19
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -594,12 +594,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
594594

595595
// Check if suitable for a bit test
596596
if (N <= DL.getIndexSizeInBits(0u)) {
597-
SmallPtrSet<const BasicBlock *, 4> Dests;
598-
for (auto I : SI.cases())
599-
Dests.insert(I.getCaseSuccessor());
597+
DenseMap<const BasicBlock *, unsigned int> DestMap;
598+
for (auto I : SI.cases()) {
599+
const BasicBlock *BB = I.getCaseSuccessor();
600+
++DestMap[BB];
601+
}
600602

601-
if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
602-
DL))
603+
if (TLI->isSuitableForBitTests(DestMap, MinCaseVal, MaxCaseVal, DL))
603604
return 1;
604605
}
605606

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,9 +1433,9 @@ class LLVM_ABI TargetLoweringBase {
14331433
/// \p High as its lowest and highest case values, and expects \p NumCmps
14341434
/// case value comparisons. Check if the number of destinations, comparison
14351435
/// metric, and range are all suitable.
1436-
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1437-
const APInt &Low, const APInt &High,
1438-
const DataLayout &DL) const {
1436+
bool isSuitableForBitTests(
1437+
const DenseMap<const BasicBlock *, unsigned int> &DestCmps,
1438+
const APInt &Low, const APInt &High, const DataLayout &DL) const {
14391439
// FIXME: I don't think NumCmps is the correct metric: a single case and a
14401440
// range of cases both require only one branch to lower. Just looking at the
14411441
// number of clusters and destinations should be enough to decide whether to
@@ -1446,6 +1446,20 @@ class LLVM_ABI TargetLoweringBase {
14461446
if (!rangeFitsInWord(Low, High, DL))
14471447
return false;
14481448

1449+
unsigned NumDests = DestCmps.size();
1450+
unsigned NumCmps = 0;
1451+
unsigned int MaxBitTestEntry = 0;
1452+
for (auto &DestCmp : DestCmps) {
1453+
NumCmps += DestCmp.second;
1454+
if (DestCmp.second > MaxBitTestEntry)
1455+
MaxBitTestEntry = DestCmp.second;
1456+
}
1457+
1458+
// Comparisons might be cheaper for small number of comparisons, which can
1459+
// be Arch Target specific.
1460+
if (MaxBitTestEntry < getMinimumBitTestCmps())
1461+
return false;
1462+
14491463
// Decide whether it's profitable to lower this range with bit tests. Each
14501464
// destination requires a bit test and branch, and there is an overall range
14511465
// check branch. For a small number of clusters, separate comparisons might
@@ -2055,6 +2069,9 @@ class LLVM_ABI TargetLoweringBase {
20552069

20562070
virtual bool isJumpTableRelative() const;
20572071

2072+
/// Retuen the minimum of largest number of comparisons in BitTest.
2073+
unsigned getMinimumBitTestCmps() const;
2074+
20582075
/// If a physical register, this specifies the register that
20592076
/// llvm.savestack/llvm.restorestack should save and restore.
20602077
Register getStackPointerRegisterToSaveRestore() const {
@@ -2577,6 +2594,9 @@ class LLVM_ABI TargetLoweringBase {
25772594
/// Set to zero to generate unlimited jump tables.
25782595
void setMaximumJumpTableSize(unsigned);
25792596

2597+
/// Set the minimum of largest of number of comparisons to generate BitTest.
2598+
void setMinimumBitTestCmps(unsigned Val);
2599+
25802600
/// If set to a physical register, this specifies the register that
25812601
/// llvm.savestack/llvm.restorestack should save and restore.
25822602
void setStackPointerRegisterToSaveRestore(Register R) {
@@ -3719,6 +3739,9 @@ class LLVM_ABI TargetLoweringBase {
37193739
/// backend supports.
37203740
unsigned MinCmpXchgSizeInBits;
37213741

3742+
/// The minimum of largest number of comparisons to use bit test for switch.
3743+
unsigned MinimumBitTestCmps;
3744+
37223745
/// This indicates if the target supports unaligned atomic operations.
37233746
bool SupportsUnalignedAtomics;
37243747

llvm/lib/CodeGen/SwitchLoweringUtils.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -198,20 +198,23 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
198198
assert(First <= Last);
199199

200200
auto Prob = BranchProbability::getZero();
201-
unsigned NumCmps = 0;
202201
std::vector<MachineBasicBlock*> Table;
203202
DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
204203

205204
// Initialize probabilities in JTProbs.
206205
for (unsigned I = First; I <= Last; ++I)
207206
JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
208207

208+
DenseMap<const BasicBlock *, unsigned int> DestMap;
209209
for (unsigned I = First; I <= Last; ++I) {
210210
assert(Clusters[I].Kind == CC_Range);
211211
Prob += Clusters[I].Prob;
212212
const APInt &Low = Clusters[I].Low->getValue();
213213
const APInt &High = Clusters[I].High->getValue();
214-
NumCmps += (Low == High) ? 1 : 2;
214+
unsigned int NumCmp = (Low == High) ? 1 : 2;
215+
const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
216+
DestMap[BB] += NumCmp;
217+
215218
if (I != First) {
216219
// Fill the gap between this and the previous cluster.
217220
const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
@@ -226,9 +229,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
226229
JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
227230
}
228231

229-
unsigned NumDests = JTProbs.size();
230-
if (TLI->isSuitableForBitTests(NumDests, NumCmps,
231-
Clusters[First].Low->getValue(),
232+
if (TLI->isSuitableForBitTests(DestMap, Clusters[First].Low->getValue(),
232233
Clusters[Last].High->getValue(), *DL)) {
233234
// Clusters[First..Last] should be lowered as bit tests instead.
234235
return false;
@@ -372,20 +373,19 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
372373
if (First == Last)
373374
return false;
374375

375-
BitVector Dests(FuncInfo.MF->getNumBlockIDs());
376-
unsigned NumCmps = 0;
376+
DenseMap<const BasicBlock *, unsigned int> DestMap;
377377
for (int64_t I = First; I <= Last; ++I) {
378378
assert(Clusters[I].Kind == CC_Range);
379-
Dests.set(Clusters[I].MBB->getNumber());
380-
NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
379+
unsigned NumCmp = (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
380+
const BasicBlock *BB = Clusters[I].MBB->getBasicBlock();
381+
DestMap[BB] += NumCmp;
381382
}
382-
unsigned NumDests = Dests.count();
383383

384384
APInt Low = Clusters[First].Low->getValue();
385385
APInt High = Clusters[Last].High->getValue();
386386
assert(Low.slt(High));
387387

388-
if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
388+
if (!TLI->isSuitableForBitTests(DestMap, Low, High, *DL))
389389
return false;
390390

391391
APInt LowBound;

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/ADT/BitVector.h"
14+
#include "llvm/ADT/DenseMap.h"
1415
#include "llvm/ADT/STLExtras.h"
1516
#include "llvm/ADT/SmallVector.h"
1617
#include "llvm/ADT/StringExtras.h"
@@ -90,6 +91,11 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
9091
cl::desc("Minimum density for building a jump table in "
9192
"an optsize function"));
9293

94+
static cl::opt<unsigned> MinimumBitTestCmpsOverride(
95+
"min-bit-test-cmps", cl::init(2), cl::Hidden,
96+
cl::desc("Set minimum of largest number of comparisons "
97+
"to use bit test for switch."));
98+
9399
// FIXME: This option is only to test if the strict fp operation processed
94100
// correctly by preventing mutating strict fp operation to normal fp operation
95101
// during development. When the backend supports strict float operation, this
@@ -719,6 +725,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
719725

720726
MinCmpXchgSizeInBits = 0;
721727
SupportsUnalignedAtomics = false;
728+
729+
MinimumBitTestCmps = MinimumBitTestCmpsOverride;
722730
}
723731

724732
// Define the virtual destructor out-of-line to act as a key method to anchor
@@ -2129,6 +2137,14 @@ bool TargetLoweringBase::isJumpTableRelative() const {
21292137
return getTargetMachine().isPositionIndependent();
21302138
}
21312139

2140+
unsigned TargetLoweringBase::getMinimumBitTestCmps() const {
2141+
return MinimumBitTestCmps;
2142+
}
2143+
2144+
void TargetLoweringBase::setMinimumBitTestCmps(unsigned Val) {
2145+
MinimumBitTestCmps = Val;
2146+
}
2147+
21322148
Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
21332149
if (TM.Options.LoopAlignment)
21342150
return Align(TM.Options.LoopAlignment);

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,11 @@ static cl::opt<unsigned> PPCMinimumJumpTableEntries(
138138
"ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
139139
cl::desc("Set minimum number of entries to use a jump table on PPC"));
140140

141+
static cl::opt<unsigned> PPCMinimumBitTestCmps(
142+
"ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
143+
cl::desc("Set minimum of largest number of comparisons to use bit test for "
144+
"switch on PPC."));
145+
141146
static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
142147
"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
143148
cl::desc("max depth when checking alias info in GatherAllAliases()"));
@@ -1436,6 +1441,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
14361441
// Re-evaluate this value on future HWs that can do better with mtctr.
14371442
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
14381443

1444+
// The default minimum of largest number in a BitTest cluster is 3.
1445+
setMinimumBitTestCmps(PPCMinimumBitTestCmps);
1446+
14391447
setMinFunctionAlignment(Align(4));
14401448
setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
14411449

0 commit comments

Comments
 (0)