@@ -120,6 +120,12 @@ static cl::opt<unsigned>
120120 cl::desc (" Maximum cost accepted for the transformation" ),
121121 cl::Hidden, cl::init(50 ));
122122
123+ static cl::opt<double > MaxClonedRate (
124+ " dfa-max-cloned-rate" ,
125+ cl::desc (
126+ " Maximum cloned instructions rate accepted for the transformation" ),
127+ cl::Hidden, cl::init(7.5 ));
128+
123129namespace {
124130
125131class SelectInstToUnfold {
@@ -828,6 +834,7 @@ struct TransformDFA {
828834 // / also returns false if it is illegal to clone some required block.
829835 bool isLegalAndProfitableToTransform () {
830836 CodeMetrics Metrics;
837+ uint64_t NumClonedInst = 0 ;
831838 SwitchInst *Switch = SwitchPaths->getSwitchInst ();
832839
833840 // Don't thread switch without multiple successors.
@@ -837,7 +844,6 @@ struct TransformDFA {
837844 // Note that DuplicateBlockMap is not being used as intended here. It is
838845 // just being used to ensure (BB, State) pairs are only counted once.
839846 DuplicateBlockMap DuplicateMap;
840-
841847 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths ()) {
842848 PathType PathBBs = TPath.getPath ();
843849 APInt NextState = TPath.getExitValue ();
@@ -848,6 +854,7 @@ struct TransformDFA {
848854 BasicBlock *VisitedBB = getClonedBB (BB, NextState, DuplicateMap);
849855 if (!VisitedBB) {
850856 Metrics.analyzeBasicBlock (BB, *TTI, EphValues);
857+ NumClonedInst += BB->sizeWithoutDebug ();
851858 DuplicateMap[BB].push_back ({BB, NextState});
852859 }
853860
@@ -865,6 +872,7 @@ struct TransformDFA {
865872 if (VisitedBB)
866873 continue ;
867874 Metrics.analyzeBasicBlock (BB, *TTI, EphValues);
875+ NumClonedInst += BB->sizeWithoutDebug ();
868876 DuplicateMap[BB].push_back ({BB, NextState});
869877 }
870878
@@ -901,6 +909,22 @@ struct TransformDFA {
901909 }
902910 }
903911
912+ // Too much cloned instructions slow down later optimizations, especially
913+ // SLPVectorizer.
914+ // TODO: Thread the switch partially before reaching the threshold.
915+ uint64_t NumOrigInst = 0 ;
916+ for (auto *BB : DuplicateMap.keys ())
917+ NumOrigInst += BB->sizeWithoutDebug ();
918+ if (double (NumClonedInst) / double (NumOrigInst) > MaxClonedRate) {
919+ LLVM_DEBUG (dbgs () << " DFA Jump Threading: Not jump threading, too much "
920+ " instructions wll be cloned\n " );
921+ ORE->emit ([&]() {
922+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotProfitable" , Switch)
923+ << " Too much instructions will be cloned." ;
924+ });
925+ return false ;
926+ }
927+
904928 InstructionCost DuplicationCost = 0 ;
905929
906930 unsigned JumpTableSize = 0 ;
0 commit comments