@@ -175,8 +175,12 @@ struct SplitCacheDirected final : public SplitStrategy {
175175 void fragment (const BlockIt Start, const BlockIt End) override {
176176 BasicBlockOrder BlockOrder (Start, End);
177177 BinaryFunction &BF = *BlockOrder.front ()->getFunction ();
178+ // No need to re-split small functions.
179+ if (BlockOrder.size () <= 2 )
180+ return ;
178181
179182 size_t BestSplitIndex = findSplitIndex (BF, BlockOrder);
183+ assert (BestSplitIndex < BlockOrder.size ());
180184
181185 // Assign fragments based on the computed best split index.
182186 // All basic blocks with index up to the best split index become hot.
@@ -200,10 +204,12 @@ struct SplitCacheDirected final : public SplitStrategy {
200204 };
201205
202206 struct SplitScore {
203- size_t SplitIndex;
207+ size_t SplitIndex = size_t (- 1 ) ;
204208 size_t HotSizeReduction = 0 ;
205209 double LocalScore = 0 ;
206210 double CoverCallScore = 0 ;
211+
212+ double sum () const { return LocalScore + CoverCallScore; }
207213 };
208214
209215 // Auxiliary variables used by the algorithm.
@@ -303,7 +309,7 @@ struct SplitCacheDirected final : public SplitStrategy {
303309 const size_t SplitIndex) {
304310 assert (SplitIndex < BlockOrder.size () && " Invalid split index" );
305311
306- // Update function layout assuming hot-warm splitting at SplitIndex
312+ // Update function layout assuming hot-warm splitting at SplitIndex.
307313 for (size_t Index = 0 ; Index < BlockOrder.size (); Index++) {
308314 BinaryBasicBlock *BB = BlockOrder[Index];
309315 if (BB->getFragmentNum () == FragmentNum::cold ())
@@ -319,8 +325,8 @@ struct SplitCacheDirected final : public SplitStrategy {
319325 // Populate BB.OutputAddressRange with estimated new start and end addresses
320326 // and compute the old end address of the hot section and the new end
321327 // address of the hot section.
322- size_t OldHotEndAddr;
323- size_t NewHotEndAddr;
328+ size_t OldHotEndAddr{ 0 } ;
329+ size_t NewHotEndAddr{ 0 } ;
324330 size_t CurrentAddr = BBOffsets[BlockOrder[0 ]];
325331 for (BinaryBasicBlock *BB : BlockOrder) {
326332 // We only care about new addresses of blocks in hot/warm.
@@ -492,20 +498,15 @@ struct SplitCacheDirected final : public SplitStrategy {
492498 }
493499
494500 // / Compute the split score of splitting a function at a given index.
495- // / The split score consists of local score and cover score. Cover call score
496- // / is expensive to compute. As a result, we pass in a \p ReferenceScore and
497- // / compute cover score only when the local score exceeds that in the
498- // / ReferenceScore or that the size reduction of the hot fragment is larger
499- // / than that achieved by the split index of the ReferenceScore. This function
500- // / returns \p Score of SplitScore type. It contains the local score and cover
501- // / score (if computed) of the current splitting index. For easier book
502- // / keeping and comparison, it also stores the split index and the resulting
503- // / reduction in hot fragment size.
501+ // / The split score consists of local score and cover score. This function
502+ // / returns \p Score of SplitScore type. It contains the local score and
503+ // / cover score of the current splitting index. For easier book keeping and
504+ // / comparison, it also stores the split index and the resulting reduction
505+ // / in hot fragment size.
504506 SplitScore computeSplitScore (const BinaryFunction &BF,
505507 const BasicBlockOrder &BlockOrder,
506508 const size_t SplitIndex,
507- const std::vector<CallInfo> &CoverCalls,
508- const SplitScore &ReferenceScore) {
509+ const std::vector<CallInfo> &CoverCalls) {
509510 // Populate BinaryBasicBlock::OutputAddressRange with estimated
510511 // new start and end addresses after hot-warm splitting at SplitIndex.
511512 size_t OldHotEnd;
@@ -533,47 +534,74 @@ struct SplitCacheDirected final : public SplitStrategy {
533534 // increamented in place.
534535 computeJumpScore (BlockOrder, SplitIndex, Score);
535536
536- // There is no need to compute CoverCallScore if we have already found
537- // another split index with a bigger LocalScore and bigger HotSizeReduction.
538- if (Score.LocalScore <= ReferenceScore.LocalScore &&
539- Score.HotSizeReduction <= ReferenceScore.HotSizeReduction )
540- return Score;
541-
542537 // Compute CoverCallScore and store in Score in place.
543538 computeCoverCallScore (BlockOrder, SplitIndex, CoverCalls, Score);
544539 return Score;
545540 }
546541
542+ // / Find the most likely successor of a basic block when it has one or two
543+ // / successors. Return nullptr otherwise.
544+ const BinaryBasicBlock *getMostLikelySuccessor (const BinaryBasicBlock *BB) {
545+ if (BB->succ_size () == 1 )
546+ return BB->getSuccessor ();
547+ if (BB->succ_size () == 2 ) {
548+ uint64_t TakenCount = BB->getTakenBranchInfo ().Count ;
549+ assert (TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
550+ uint64_t NonTakenCount = BB->getFallthroughBranchInfo ().Count ;
551+ assert (NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE);
552+ if (TakenCount > NonTakenCount)
553+ return BB->getConditionalSuccessor (true );
554+ else if (TakenCount < NonTakenCount)
555+ return BB->getConditionalSuccessor (false );
556+ }
557+ return nullptr ;
558+ }
559+
547560 // / Find the best index for splitting. The returned value is the index of the
548561 // / last hot basic block. Hence, "no splitting" is equivalent to returning the
549562 // / value which is one less than the size of the function.
550563 size_t findSplitIndex (const BinaryFunction &BF,
551564 const BasicBlockOrder &BlockOrder) {
565+ assert (BlockOrder.size () > 2 );
552566 // Find all function calls that can be shortened if we move blocks of the
553567 // current function to warm/cold
554568 const std::vector<CallInfo> CoverCalls = extractCoverCalls (BF);
555569
556- // Try all possible split indices (blocks with Index <= SplitIndex are in
557- // hot) and find the one maximizing the splitting score.
570+ // Find the existing hot-cold splitting index.
571+ size_t HotColdIndex = 0 ;
572+ while (HotColdIndex + 1 < BlockOrder.size ()) {
573+ if (BlockOrder[HotColdIndex + 1 ]->getFragmentNum () == FragmentNum::cold ())
574+ break ;
575+ HotColdIndex++;
576+ }
577+ assert (HotColdIndex + 1 == BlockOrder.size () ||
578+ (BlockOrder[HotColdIndex]->getFragmentNum () == FragmentNum::main () &&
579+ BlockOrder[HotColdIndex + 1 ]->getFragmentNum () ==
580+ FragmentNum::cold ()));
581+
582+ // Try all possible split indices up to HotColdIndex (blocks that have
583+ // Index <= SplitIndex are in hot) and find the one maximizing the
584+ // splitting score.
558585 SplitScore BestScore;
559- double BestScoreSum = -1.0 ;
560- SplitScore ReferenceScore;
561- for (size_t Index = 0 ; Index < BlockOrder.size (); Index++) {
586+ for (size_t Index = 0 ; Index <= HotColdIndex; Index++) {
562587 const BinaryBasicBlock *LastHotBB = BlockOrder[Index];
563- // No need to keep cold blocks in the hot section.
564- if (LastHotBB->getFragmentNum () == FragmentNum::cold ())
565- break ;
588+ assert (LastHotBB->getFragmentNum () != FragmentNum::cold ());
589+
590+ // Do not break jump to the most likely successor.
591+ if (Index + 1 < BlockOrder.size () &&
592+ BlockOrder[Index + 1 ] == getMostLikelySuccessor (LastHotBB))
593+ continue ;
594+
566595 const SplitScore Score =
567- computeSplitScore (BF, BlockOrder, Index, CoverCalls, ReferenceScore);
568- double ScoreSum = Score.LocalScore + Score.CoverCallScore ;
569- if (ScoreSum > BestScoreSum) {
570- BestScoreSum = ScoreSum;
596+ computeSplitScore (BF, BlockOrder, Index, CoverCalls);
597+ if (Score.sum () > BestScore.sum ())
571598 BestScore = Score;
572- }
573- if (Score.LocalScore > ReferenceScore.LocalScore )
574- ReferenceScore = Score;
575599 }
576600
601+ // If we don't find a good splitting point, fallback to the original one.
602+ if (BestScore.SplitIndex == size_t (-1 ))
603+ return HotColdIndex;
604+
577605 return BestScore.SplitIndex ;
578606 }
579607};
0 commit comments