Skip to content

Commit cf85ec5

Browse files
authored
[DFAJumpThreading] Constraint the number of cloned instructions (#161632)
Duplicating blocks of threaded paths may cause a significant regression in IR size and slow down compile-time in later optimizations. This patch adds a coarse constraint on the number of duplicated instructions.
1 parent 9cbcc87 commit cf85ec5

File tree

1 file changed

+25
-1
lines changed

1 file changed

+25
-1
lines changed

llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ static cl::opt<unsigned>
120120
cl::desc("Maximum cost accepted for the transformation"),
121121
cl::Hidden, cl::init(50));
122122

123+
static cl::opt<double> MaxClonedRate(
124+
"dfa-max-cloned-rate",
125+
cl::desc(
126+
"Maximum cloned instructions rate accepted for the transformation"),
127+
cl::Hidden, cl::init(7.5));
128+
123129
namespace {
124130

125131
class SelectInstToUnfold {
@@ -828,6 +834,7 @@ struct TransformDFA {
828834
/// also returns false if it is illegal to clone some required block.
829835
bool isLegalAndProfitableToTransform() {
830836
CodeMetrics Metrics;
837+
uint64_t NumClonedInst = 0;
831838
SwitchInst *Switch = SwitchPaths->getSwitchInst();
832839

833840
// Don't thread switch without multiple successors.
@@ -837,7 +844,6 @@ struct TransformDFA {
837844
// Note that DuplicateBlockMap is not being used as intended here. It is
838845
// just being used to ensure (BB, State) pairs are only counted once.
839846
DuplicateBlockMap DuplicateMap;
840-
841847
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
842848
PathType PathBBs = TPath.getPath();
843849
APInt NextState = TPath.getExitValue();
@@ -848,6 +854,7 @@ struct TransformDFA {
848854
BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
849855
if (!VisitedBB) {
850856
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
857+
NumClonedInst += BB->sizeWithoutDebug();
851858
DuplicateMap[BB].push_back({BB, NextState});
852859
}
853860

@@ -865,6 +872,7 @@ struct TransformDFA {
865872
if (VisitedBB)
866873
continue;
867874
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
875+
NumClonedInst += BB->sizeWithoutDebug();
868876
DuplicateMap[BB].push_back({BB, NextState});
869877
}
870878

@@ -901,6 +909,22 @@ struct TransformDFA {
901909
}
902910
}
903911

912+
// Too much cloned instructions slow down later optimizations, especially
913+
// SLPVectorizer.
914+
// TODO: Thread the switch partially before reaching the threshold.
915+
uint64_t NumOrigInst = 0;
916+
for (auto *BB : DuplicateMap.keys())
917+
NumOrigInst += BB->sizeWithoutDebug();
918+
if (double(NumClonedInst) / double(NumOrigInst) > MaxClonedRate) {
919+
LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, too much "
920+
"instructions wll be cloned\n");
921+
ORE->emit([&]() {
922+
return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
923+
<< "Too much instructions will be cloned.";
924+
});
925+
return false;
926+
}
927+
904928
InstructionCost DuplicationCost = 0;
905929

906930
unsigned JumpTableSize = 0;

0 commit comments

Comments
 (0)