@@ -120,6 +120,12 @@ static cl::opt<unsigned>
120
120
cl::desc (" Maximum cost accepted for the transformation" ),
121
121
cl::Hidden, cl::init(50 ));
122
122
123
+ static cl::opt<double > MaxClonedRate (
124
+ " dfa-max-cloned-rate" ,
125
+ cl::desc (
126
+ " Maximum cloned instructions rate accepted for the transformation" ),
127
+ cl::Hidden, cl::init(7.5 ));
128
+
123
129
namespace {
124
130
125
131
class SelectInstToUnfold {
@@ -828,6 +834,7 @@ struct TransformDFA {
828
834
// / also returns false if it is illegal to clone some required block.
829
835
bool isLegalAndProfitableToTransform () {
830
836
CodeMetrics Metrics;
837
+ uint64_t NumClonedInst = 0 ;
831
838
SwitchInst *Switch = SwitchPaths->getSwitchInst ();
832
839
833
840
// Don't thread switch without multiple successors.
@@ -837,7 +844,6 @@ struct TransformDFA {
837
844
// Note that DuplicateBlockMap is not being used as intended here. It is
838
845
// just being used to ensure (BB, State) pairs are only counted once.
839
846
DuplicateBlockMap DuplicateMap;
840
-
841
847
for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths ()) {
842
848
PathType PathBBs = TPath.getPath ();
843
849
APInt NextState = TPath.getExitValue ();
@@ -848,6 +854,7 @@ struct TransformDFA {
848
854
BasicBlock *VisitedBB = getClonedBB (BB, NextState, DuplicateMap);
849
855
if (!VisitedBB) {
850
856
Metrics.analyzeBasicBlock (BB, *TTI, EphValues);
857
+ NumClonedInst += BB->sizeWithoutDebug ();
851
858
DuplicateMap[BB].push_back ({BB, NextState});
852
859
}
853
860
@@ -865,6 +872,7 @@ struct TransformDFA {
865
872
if (VisitedBB)
866
873
continue ;
867
874
Metrics.analyzeBasicBlock (BB, *TTI, EphValues);
875
+ NumClonedInst += BB->sizeWithoutDebug ();
868
876
DuplicateMap[BB].push_back ({BB, NextState});
869
877
}
870
878
@@ -901,6 +909,22 @@ struct TransformDFA {
901
909
}
902
910
}
903
911
912
+ // Too much cloned instructions slow down later optimizations, especially
913
+ // SLPVectorizer.
914
+ // TODO: Thread the switch partially before reaching the threshold.
915
+ uint64_t NumOrigInst = 0 ;
916
+ for (auto *BB : DuplicateMap.keys ())
917
+ NumOrigInst += BB->sizeWithoutDebug ();
918
+ if (double (NumClonedInst) / double (NumOrigInst) > MaxClonedRate) {
919
+ LLVM_DEBUG (dbgs () << " DFA Jump Threading: Not jump threading, too much "
920
+ " instructions wll be cloned\n " );
921
+ ORE->emit ([&]() {
922
+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotProfitable" , Switch)
923
+ << " Too much instructions will be cloned." ;
924
+ });
925
+ return false ;
926
+ }
927
+
904
928
InstructionCost DuplicationCost = 0 ;
905
929
906
930
unsigned JumpTableSize = 0 ;
0 commit comments