@@ -2787,6 +2787,188 @@ CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
2787
2787
return None;
2788
2788
}
2789
2789
2790
+ static bool PerformBranchToCommonDestFolding (BranchInst *BI, BranchInst *PBI,
2791
+ DomTreeUpdater *DTU,
2792
+ MemorySSAUpdater *MSSAU) {
2793
+ BasicBlock *BB = BI->getParent ();
2794
+ BasicBlock *PredBlock = PBI->getParent ();
2795
+
2796
+ // Determine if the two branches share a common destination.
2797
+ Instruction::BinaryOps Opc;
2798
+ bool InvertPredCond;
2799
+ std::tie (Opc, InvertPredCond) =
2800
+ *CheckIfCondBranchesShareCommonDestination (BI, PBI);
2801
+
2802
+ LLVM_DEBUG (dbgs () << " FOLDING BRANCH TO COMMON DEST:\n " << *PBI << *BB);
2803
+
2804
+ IRBuilder<> Builder (PBI);
2805
+ // The builder is used to create instructions to eliminate the branch in BB.
2806
+ // If BB's terminator has !annotation metadata, add it to the new
2807
+ // instructions.
2808
+ Builder.CollectMetadataToCopy (BB->getTerminator (),
2809
+ {LLVMContext::MD_annotation});
2810
+
2811
+ // If we need to invert the condition in the pred block to match, do so now.
2812
+ if (InvertPredCond) {
2813
+ Value *NewCond = PBI->getCondition ();
2814
+ if (NewCond->hasOneUse () && isa<CmpInst>(NewCond)) {
2815
+ CmpInst *CI = cast<CmpInst>(NewCond);
2816
+ CI->setPredicate (CI->getInversePredicate ());
2817
+ } else {
2818
+ NewCond =
2819
+ Builder.CreateNot (NewCond, PBI->getCondition ()->getName () + " .not" );
2820
+ }
2821
+
2822
+ PBI->setCondition (NewCond);
2823
+ PBI->swapSuccessors ();
2824
+ }
2825
+
2826
+ BasicBlock *UniqueSucc =
2827
+ PBI->getSuccessor (0 ) == BB ? BI->getSuccessor (0 ) : BI->getSuccessor (1 );
2828
+
2829
+ // Before cloning instructions, notify the successor basic block that it
2830
+ // is about to have a new predecessor. This will update PHI nodes,
2831
+ // which will allow us to update live-out uses of bonus instructions.
2832
+ AddPredecessorToBlock (UniqueSucc, PredBlock, BB, MSSAU);
2833
+
2834
+ // If we have bonus instructions, clone them into the predecessor block.
2835
+ // Note that there may be multiple predecessor blocks, so we cannot move
2836
+ // bonus instructions to a predecessor block.
2837
+ ValueToValueMapTy VMap; // maps original values to cloned values
2838
+ for (Instruction &BonusInst : *BB) {
2839
+ if (isa<DbgInfoIntrinsic>(BonusInst) || isa<BranchInst>(BonusInst))
2840
+ continue ;
2841
+
2842
+ Instruction *NewBonusInst = BonusInst.clone ();
2843
+
2844
+ if (PBI->getDebugLoc () != NewBonusInst->getDebugLoc ()) {
2845
+ // Unless the instruction has the same !dbg location as the original
2846
+ // branch, drop it. When we fold the bonus instructions we want to make
2847
+ // sure we reset their debug locations in order to avoid stepping on
2848
+ // dead code caused by folding dead branches.
2849
+ NewBonusInst->setDebugLoc (DebugLoc ());
2850
+ }
2851
+
2852
+ RemapInstruction (NewBonusInst, VMap,
2853
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2854
+ VMap[&BonusInst] = NewBonusInst;
2855
+
2856
+ // If we moved a load, we cannot any longer claim any knowledge about
2857
+ // its potential value. The previous information might have been valid
2858
+ // only given the branch precondition.
2859
+ // For an analogous reason, we must also drop all the metadata whose
2860
+ // semantics we don't understand. We *can* preserve !annotation, because
2861
+ // it is tied to the instruction itself, not the value or position.
2862
+ NewBonusInst->dropUnknownNonDebugMetadata (LLVMContext::MD_annotation);
2863
+
2864
+ PredBlock->getInstList ().insert (PBI->getIterator (), NewBonusInst);
2865
+ NewBonusInst->takeName (&BonusInst);
2866
+ BonusInst.setName (BonusInst.getName () + " .old" );
2867
+ BonusInst.replaceUsesWithIf (
2868
+ NewBonusInst, [BB, BI, UniqueSucc, PredBlock](Use &U) {
2869
+ auto *User = cast<Instruction>(U.getUser ());
2870
+ // Ignore non-external uses of bonus instructions.
2871
+ if (User->getParent () == BB) {
2872
+ assert (!isa<PHINode>(User) &&
2873
+ " Non-external users are never PHI instructions." );
2874
+ return false ;
2875
+ }
2876
+ if (User->getParent () == PredBlock) {
2877
+ // The "exteral" use is in the block into which we just cloned the
2878
+ // bonus instruction. This means two things: 1. we are in an
2879
+ // unreachable block 2. the instruction is self-referencing.
2880
+ // So let's just rewrite it...
2881
+ return true ;
2882
+ }
2883
+ (void )BI;
2884
+ assert (isa<PHINode>(User) && " All external users must be PHI's." );
2885
+ auto *PN = cast<PHINode>(User);
2886
+ assert (is_contained (successors (BB), User->getParent ()) &&
2887
+ " All external users must be in successors of BB." );
2888
+ assert ((PN->getIncomingBlock (U) == BB ||
2889
+ PN->getIncomingBlock (U) == PredBlock) &&
2890
+ " The incoming block for that incoming value external use "
2891
+ " must be either the original block with bonus instructions, "
2892
+ " or the new predecessor block." );
2893
+ // UniqueSucc is the block for which we change it's predecessors,
2894
+ // so it is the only block in which we'll need to update PHI nodes.
2895
+ if (User->getParent () != UniqueSucc)
2896
+ return false ;
2897
+ // Update the incoming value for the new predecessor.
2898
+ return PN->getIncomingBlock (U) == PredBlock;
2899
+ });
2900
+ }
2901
+
2902
+ // Now that the Cond was cloned into the predecessor basic block,
2903
+ // or/and the two conditions together.
2904
+ Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp (
2905
+ Opc, PBI->getCondition (), VMap[BI->getCondition ()], " or.cond" ));
2906
+ PBI->setCondition (NewCond);
2907
+
2908
+ uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
2909
+ if (extractPredSuccWeights (PBI, BI, PredTrueWeight, PredFalseWeight,
2910
+ SuccTrueWeight, SuccFalseWeight)) {
2911
+ SmallVector<uint64_t , 8 > NewWeights;
2912
+
2913
+ if (PBI->getSuccessor (0 ) == BB) {
2914
+ // PBI: br i1 %x, BB, FalseDest
2915
+ // BI: br i1 %y, UniqueSucc, FalseDest
2916
+ // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
2917
+ NewWeights.push_back (PredTrueWeight * SuccTrueWeight);
2918
+ // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
2919
+ // TrueWeight for PBI * FalseWeight for BI.
2920
+ // We assume that total weights of a BranchInst can fit into 32 bits.
2921
+ // Therefore, we will not have overflow using 64-bit arithmetic.
2922
+ NewWeights.push_back (PredFalseWeight *
2923
+ (SuccFalseWeight + SuccTrueWeight) +
2924
+ PredTrueWeight * SuccFalseWeight);
2925
+ } else {
2926
+ // PBI: br i1 %x, TrueDest, BB
2927
+ // BI: br i1 %y, TrueDest, UniqueSucc
2928
+ // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
2929
+ // FalseWeight for PBI * TrueWeight for BI.
2930
+ NewWeights.push_back (PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
2931
+ PredFalseWeight * SuccTrueWeight);
2932
+ // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
2933
+ NewWeights.push_back (PredFalseWeight * SuccFalseWeight);
2934
+ }
2935
+
2936
+ // Halve the weights if any of them cannot fit in an uint32_t
2937
+ FitWeights (NewWeights);
2938
+
2939
+ SmallVector<uint32_t , 8 > MDWeights (NewWeights.begin (), NewWeights.end ());
2940
+ setBranchWeights (PBI, MDWeights[0 ], MDWeights[1 ]);
2941
+
2942
+ // TODO: If BB is reachable from all paths through PredBlock, then we
2943
+ // could replace PBI's branch probabilities with BI's.
2944
+ } else
2945
+ PBI->setMetadata (LLVMContext::MD_prof, nullptr );
2946
+
2947
+ PBI->setSuccessor (PBI->getSuccessor (0 ) != BB, UniqueSucc);
2948
+
2949
+ if (DTU)
2950
+ DTU->applyUpdates ({{DominatorTree::Insert, PredBlock, UniqueSucc},
2951
+ {DominatorTree::Delete, PredBlock, BB}});
2952
+
2953
+ // If BI was a loop latch, it may have had associated loop metadata.
2954
+ // We need to copy it to the new latch, that is, PBI.
2955
+ if (MDNode *LoopMD = BI->getMetadata (LLVMContext::MD_loop))
2956
+ PBI->setMetadata (LLVMContext::MD_loop, LoopMD);
2957
+
2958
+ // Copy any debug value intrinsics into the end of PredBlock.
2959
+ for (Instruction &I : *BB) {
2960
+ if (isa<DbgInfoIntrinsic>(I)) {
2961
+ Instruction *NewI = I.clone ();
2962
+ RemapInstruction (NewI, VMap,
2963
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2964
+ NewI->insertBefore (PBI);
2965
+ }
2966
+ }
2967
+
2968
+ ++NumFoldBranchToCommonDest;
2969
+ return true ;
2970
+ }
2971
+
2790
2972
// / If this basic block is simple enough, and if a predecessor branches to us
2791
2973
// / and one of our successors, fold the block into the predecessor and use
2792
2974
// / logical operations to pick the right destination.
@@ -2805,11 +2987,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
2805
2987
2806
2988
bool Changed = false ;
2807
2989
2808
- auto _ = make_scope_exit ([&]() {
2809
- if (Changed)
2810
- ++NumFoldBranchToCommonDest;
2811
- });
2812
-
2813
2990
TargetTransformInfo::TargetCostKind CostKind =
2814
2991
BB->getParent ()->hasMinSize () ? TargetTransformInfo::TCK_CodeSize
2815
2992
: TargetTransformInfo::TCK_SizeAndLatency;
@@ -2872,9 +3049,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
2872
3049
return Changed;
2873
3050
2874
3051
// Finally, don't infinitely unroll conditional loops.
2875
- BasicBlock *TrueDest = BI->getSuccessor (0 );
2876
- BasicBlock *FalseDest = BI->getSuccessor (1 );
2877
- if (TrueDest == BB || FalseDest == BB)
3052
+ if (is_contained (successors (BB), BB))
2878
3053
return Changed;
2879
3054
2880
3055
for (pred_iterator PI = pred_begin (BB), E = pred_end (BB); PI != E; ++PI) {
@@ -2909,174 +3084,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
2909
3084
continue ;
2910
3085
}
2911
3086
2912
- LLVM_DEBUG (dbgs () << " FOLDING BRANCH TO COMMON DEST:\n " << *PBI << *BB);
2913
- Changed = true ;
2914
-
2915
- IRBuilder<> Builder (PBI);
2916
- // The builder is used to create instructions to eliminate the branch in BB.
2917
- // If BB's terminator has !annotation metadata, add it to the new
2918
- // instructions.
2919
- Builder.CollectMetadataToCopy (BB->getTerminator (),
2920
- {LLVMContext::MD_annotation});
2921
-
2922
- // If we need to invert the condition in the pred block to match, do so now.
2923
- if (InvertPredCond) {
2924
- Value *NewCond = PBI->getCondition ();
2925
- if (NewCond->hasOneUse () && isa<CmpInst>(NewCond)) {
2926
- CmpInst *CI = cast<CmpInst>(NewCond);
2927
- CI->setPredicate (CI->getInversePredicate ());
2928
- } else {
2929
- NewCond =
2930
- Builder.CreateNot (NewCond, PBI->getCondition ()->getName () + " .not" );
2931
- }
2932
-
2933
- PBI->setCondition (NewCond);
2934
- PBI->swapSuccessors ();
2935
- }
2936
-
2937
- BasicBlock *UniqueSucc = PBI->getSuccessor (0 ) == BB ? TrueDest : FalseDest;
2938
-
2939
- // Before cloning instructions, notify the successor basic block that it
2940
- // is about to have a new predecessor. This will update PHI nodes,
2941
- // which will allow us to update live-out uses of bonus instructions.
2942
- AddPredecessorToBlock (UniqueSucc, PredBlock, BB, MSSAU);
2943
-
2944
- // If we have bonus instructions, clone them into the predecessor block.
2945
- // Note that there may be multiple predecessor blocks, so we cannot move
2946
- // bonus instructions to a predecessor block.
2947
- ValueToValueMapTy VMap; // maps original values to cloned values
2948
- for (Instruction &BonusInst : *BB) {
2949
- if (isa<DbgInfoIntrinsic>(BonusInst) || isa<BranchInst>(BonusInst))
2950
- continue ;
2951
-
2952
- Instruction *NewBonusInst = BonusInst.clone ();
2953
-
2954
- if (PBI->getDebugLoc () != NewBonusInst->getDebugLoc ()) {
2955
- // Unless the instruction has the same !dbg location as the original
2956
- // branch, drop it. When we fold the bonus instructions we want to make
2957
- // sure we reset their debug locations in order to avoid stepping on
2958
- // dead code caused by folding dead branches.
2959
- NewBonusInst->setDebugLoc (DebugLoc ());
2960
- }
2961
-
2962
- RemapInstruction (NewBonusInst, VMap,
2963
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2964
- VMap[&BonusInst] = NewBonusInst;
2965
-
2966
- // If we moved a load, we cannot any longer claim any knowledge about
2967
- // its potential value. The previous information might have been valid
2968
- // only given the branch precondition.
2969
- // For an analogous reason, we must also drop all the metadata whose
2970
- // semantics we don't understand. We *can* preserve !annotation, because
2971
- // it is tied to the instruction itself, not the value or position.
2972
- NewBonusInst->dropUnknownNonDebugMetadata (LLVMContext::MD_annotation);
2973
-
2974
- PredBlock->getInstList ().insert (PBI->getIterator (), NewBonusInst);
2975
- NewBonusInst->takeName (&BonusInst);
2976
- BonusInst.setName (BonusInst.getName () + " .old" );
2977
- BonusInst.replaceUsesWithIf (
2978
- NewBonusInst, [BB, BI, UniqueSucc, PredBlock](Use &U) {
2979
- auto *User = cast<Instruction>(U.getUser ());
2980
- // Ignore non-external uses of bonus instructions.
2981
- if (User->getParent () == BB) {
2982
- assert (!isa<PHINode>(User) &&
2983
- " Non-external users are never PHI instructions." );
2984
- return false ;
2985
- }
2986
- if (User->getParent () == PredBlock) {
2987
- // The "exteral" use is in the block into which we just cloned the
2988
- // bonus instruction. This means two things: 1. we are in an
2989
- // unreachable block 2. the instruction is self-referencing.
2990
- // So let's just rewrite it...
2991
- return true ;
2992
- }
2993
- (void )BI;
2994
- assert (isa<PHINode>(User) && " All external users must be PHI's." );
2995
- auto *PN = cast<PHINode>(User);
2996
- assert (is_contained (successors (BB), User->getParent ()) &&
2997
- " All external users must be in successors of BB." );
2998
- assert ((PN->getIncomingBlock (U) == BB ||
2999
- PN->getIncomingBlock (U) == PredBlock) &&
3000
- " The incoming block for that incoming value external use "
3001
- " must be either the original block with bonus instructions, "
3002
- " or the new predecessor block." );
3003
- // UniqueSucc is the block for which we change it's predecessors,
3004
- // so it is the only block in which we'll need to update PHI nodes.
3005
- if (User->getParent () != UniqueSucc)
3006
- return false ;
3007
- // Update the incoming value for the new predecessor.
3008
- return PN->getIncomingBlock (U) == PredBlock;
3009
- });
3010
- }
3011
-
3012
- // Now that the Cond was cloned into the predecessor basic block,
3013
- // or/and the two conditions together.
3014
- Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp (
3015
- Opc, PBI->getCondition (), VMap[BI->getCondition ()], " or.cond" ));
3016
- PBI->setCondition (NewCond);
3017
-
3018
- uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3019
- if (extractPredSuccWeights (PBI, BI, PredTrueWeight, PredFalseWeight,
3020
- SuccTrueWeight, SuccFalseWeight)) {
3021
- SmallVector<uint64_t , 8 > NewWeights;
3022
-
3023
- if (PBI->getSuccessor (0 ) == BB) {
3024
- // PBI: br i1 %x, BB, FalseDest
3025
- // BI: br i1 %y, UniqueSucc, FalseDest
3026
- // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3027
- NewWeights.push_back (PredTrueWeight * SuccTrueWeight);
3028
- // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3029
- // TrueWeight for PBI * FalseWeight for BI.
3030
- // We assume that total weights of a BranchInst can fit into 32 bits.
3031
- // Therefore, we will not have overflow using 64-bit arithmetic.
3032
- NewWeights.push_back (PredFalseWeight *
3033
- (SuccFalseWeight + SuccTrueWeight) +
3034
- PredTrueWeight * SuccFalseWeight);
3035
- } else {
3036
- // PBI: br i1 %x, TrueDest, BB
3037
- // BI: br i1 %y, TrueDest, UniqueSucc
3038
- // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3039
- // FalseWeight for PBI * TrueWeight for BI.
3040
- NewWeights.push_back (PredTrueWeight *
3041
- (SuccFalseWeight + SuccTrueWeight) +
3042
- PredFalseWeight * SuccTrueWeight);
3043
- // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3044
- NewWeights.push_back (PredFalseWeight * SuccFalseWeight);
3045
- }
3046
-
3047
- // Halve the weights if any of them cannot fit in an uint32_t
3048
- FitWeights (NewWeights);
3049
-
3050
- SmallVector<uint32_t , 8 > MDWeights (NewWeights.begin (), NewWeights.end ());
3051
- setBranchWeights (PBI, MDWeights[0 ], MDWeights[1 ]);
3052
-
3053
- // TODO: If BB is reachable from all paths through PredBlock, then we
3054
- // could replace PBI's branch probabilities with BI's.
3055
- } else
3056
- PBI->setMetadata (LLVMContext::MD_prof, nullptr );
3057
-
3058
- PBI->setSuccessor (PBI->getSuccessor (0 ) != BB, UniqueSucc);
3059
-
3060
- if (DTU)
3061
- DTU->applyUpdates ({{DominatorTree::Insert, PredBlock, UniqueSucc},
3062
- {DominatorTree::Delete, PredBlock, BB}});
3063
-
3064
- // If BI was a loop latch, it may have had associated loop metadata.
3065
- // We need to copy it to the new latch, that is, PBI.
3066
- if (MDNode *LoopMD = BI->getMetadata (LLVMContext::MD_loop))
3067
- PBI->setMetadata (LLVMContext::MD_loop, LoopMD);
3068
-
3069
- // Copy any debug value intrinsics into the end of PredBlock.
3070
- for (Instruction &I : *BB) {
3071
- if (isa<DbgInfoIntrinsic>(I)) {
3072
- Instruction *NewI = I.clone ();
3073
- RemapInstruction (NewI, VMap,
3074
- RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
3075
- NewI->insertBefore (PBI);
3076
- }
3077
- }
3078
-
3079
- return Changed;
3087
+ return PerformBranchToCommonDestFolding (BI, PBI, DTU, MSSAU);
3080
3088
}
3081
3089
return Changed;
3082
3090
}
0 commit comments