Skip to content

Commit efeb8ca

Browse files
committed
[NFC][SimplifyCFG] FoldBranchToCommonDest(): extract the actual transform into helper function
I'm intentionally structuring it this way, so that the actual fold only does the fold, and no legality/correctness checks, all of which must be done by the caller. This allows for the fold code to be more compact and more easily grokable.
1 parent b482560 commit efeb8ca

File tree

1 file changed

+184
-176
lines changed

1 file changed

+184
-176
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 184 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,6 +2787,188 @@ CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
27872787
return None;
27882788
}
27892789

2790+
static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
2791+
DomTreeUpdater *DTU,
2792+
MemorySSAUpdater *MSSAU) {
2793+
BasicBlock *BB = BI->getParent();
2794+
BasicBlock *PredBlock = PBI->getParent();
2795+
2796+
// Determine if the two branches share a common destination.
2797+
Instruction::BinaryOps Opc;
2798+
bool InvertPredCond;
2799+
std::tie(Opc, InvertPredCond) =
2800+
*CheckIfCondBranchesShareCommonDestination(BI, PBI);
2801+
2802+
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
2803+
2804+
IRBuilder<> Builder(PBI);
2805+
// The builder is used to create instructions to eliminate the branch in BB.
2806+
// If BB's terminator has !annotation metadata, add it to the new
2807+
// instructions.
2808+
Builder.CollectMetadataToCopy(BB->getTerminator(),
2809+
{LLVMContext::MD_annotation});
2810+
2811+
// If we need to invert the condition in the pred block to match, do so now.
2812+
if (InvertPredCond) {
2813+
Value *NewCond = PBI->getCondition();
2814+
if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
2815+
CmpInst *CI = cast<CmpInst>(NewCond);
2816+
CI->setPredicate(CI->getInversePredicate());
2817+
} else {
2818+
NewCond =
2819+
Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
2820+
}
2821+
2822+
PBI->setCondition(NewCond);
2823+
PBI->swapSuccessors();
2824+
}
2825+
2826+
BasicBlock *UniqueSucc =
2827+
PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
2828+
2829+
// Before cloning instructions, notify the successor basic block that it
2830+
// is about to have a new predecessor. This will update PHI nodes,
2831+
// which will allow us to update live-out uses of bonus instructions.
2832+
AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
2833+
2834+
// If we have bonus instructions, clone them into the predecessor block.
2835+
// Note that there may be multiple predecessor blocks, so we cannot move
2836+
// bonus instructions to a predecessor block.
2837+
ValueToValueMapTy VMap; // maps original values to cloned values
2838+
for (Instruction &BonusInst : *BB) {
2839+
if (isa<DbgInfoIntrinsic>(BonusInst) || isa<BranchInst>(BonusInst))
2840+
continue;
2841+
2842+
Instruction *NewBonusInst = BonusInst.clone();
2843+
2844+
if (PBI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
2845+
// Unless the instruction has the same !dbg location as the original
2846+
// branch, drop it. When we fold the bonus instructions we want to make
2847+
// sure we reset their debug locations in order to avoid stepping on
2848+
// dead code caused by folding dead branches.
2849+
NewBonusInst->setDebugLoc(DebugLoc());
2850+
}
2851+
2852+
RemapInstruction(NewBonusInst, VMap,
2853+
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2854+
VMap[&BonusInst] = NewBonusInst;
2855+
2856+
// If we moved a load, we cannot any longer claim any knowledge about
2857+
// its potential value. The previous information might have been valid
2858+
// only given the branch precondition.
2859+
// For an analogous reason, we must also drop all the metadata whose
2860+
// semantics we don't understand. We *can* preserve !annotation, because
2861+
// it is tied to the instruction itself, not the value or position.
2862+
NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation);
2863+
2864+
PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
2865+
NewBonusInst->takeName(&BonusInst);
2866+
BonusInst.setName(BonusInst.getName() + ".old");
2867+
BonusInst.replaceUsesWithIf(
2868+
NewBonusInst, [BB, BI, UniqueSucc, PredBlock](Use &U) {
2869+
auto *User = cast<Instruction>(U.getUser());
2870+
// Ignore non-external uses of bonus instructions.
2871+
if (User->getParent() == BB) {
2872+
assert(!isa<PHINode>(User) &&
2873+
"Non-external users are never PHI instructions.");
2874+
return false;
2875+
}
2876+
if (User->getParent() == PredBlock) {
2877+
// The "exteral" use is in the block into which we just cloned the
2878+
// bonus instruction. This means two things: 1. we are in an
2879+
// unreachable block 2. the instruction is self-referencing.
2880+
// So let's just rewrite it...
2881+
return true;
2882+
}
2883+
(void)BI;
2884+
assert(isa<PHINode>(User) && "All external users must be PHI's.");
2885+
auto *PN = cast<PHINode>(User);
2886+
assert(is_contained(successors(BB), User->getParent()) &&
2887+
"All external users must be in successors of BB.");
2888+
assert((PN->getIncomingBlock(U) == BB ||
2889+
PN->getIncomingBlock(U) == PredBlock) &&
2890+
"The incoming block for that incoming value external use "
2891+
"must be either the original block with bonus instructions, "
2892+
"or the new predecessor block.");
2893+
// UniqueSucc is the block for which we change it's predecessors,
2894+
// so it is the only block in which we'll need to update PHI nodes.
2895+
if (User->getParent() != UniqueSucc)
2896+
return false;
2897+
// Update the incoming value for the new predecessor.
2898+
return PN->getIncomingBlock(U) == PredBlock;
2899+
});
2900+
}
2901+
2902+
// Now that the Cond was cloned into the predecessor basic block,
2903+
// or/and the two conditions together.
2904+
Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(
2905+
Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond"));
2906+
PBI->setCondition(NewCond);
2907+
2908+
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
2909+
if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
2910+
SuccTrueWeight, SuccFalseWeight)) {
2911+
SmallVector<uint64_t, 8> NewWeights;
2912+
2913+
if (PBI->getSuccessor(0) == BB) {
2914+
// PBI: br i1 %x, BB, FalseDest
2915+
// BI: br i1 %y, UniqueSucc, FalseDest
2916+
// TrueWeight is TrueWeight for PBI * TrueWeight for BI.
2917+
NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
2918+
// FalseWeight is FalseWeight for PBI * TotalWeight for BI +
2919+
// TrueWeight for PBI * FalseWeight for BI.
2920+
// We assume that total weights of a BranchInst can fit into 32 bits.
2921+
// Therefore, we will not have overflow using 64-bit arithmetic.
2922+
NewWeights.push_back(PredFalseWeight *
2923+
(SuccFalseWeight + SuccTrueWeight) +
2924+
PredTrueWeight * SuccFalseWeight);
2925+
} else {
2926+
// PBI: br i1 %x, TrueDest, BB
2927+
// BI: br i1 %y, TrueDest, UniqueSucc
2928+
// TrueWeight is TrueWeight for PBI * TotalWeight for BI +
2929+
// FalseWeight for PBI * TrueWeight for BI.
2930+
NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
2931+
PredFalseWeight * SuccTrueWeight);
2932+
// FalseWeight is FalseWeight for PBI * FalseWeight for BI.
2933+
NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
2934+
}
2935+
2936+
// Halve the weights if any of them cannot fit in an uint32_t
2937+
FitWeights(NewWeights);
2938+
2939+
SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
2940+
setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
2941+
2942+
// TODO: If BB is reachable from all paths through PredBlock, then we
2943+
// could replace PBI's branch probabilities with BI's.
2944+
} else
2945+
PBI->setMetadata(LLVMContext::MD_prof, nullptr);
2946+
2947+
PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
2948+
2949+
if (DTU)
2950+
DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
2951+
{DominatorTree::Delete, PredBlock, BB}});
2952+
2953+
// If BI was a loop latch, it may have had associated loop metadata.
2954+
// We need to copy it to the new latch, that is, PBI.
2955+
if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
2956+
PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
2957+
2958+
// Copy any debug value intrinsics into the end of PredBlock.
2959+
for (Instruction &I : *BB) {
2960+
if (isa<DbgInfoIntrinsic>(I)) {
2961+
Instruction *NewI = I.clone();
2962+
RemapInstruction(NewI, VMap,
2963+
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2964+
NewI->insertBefore(PBI);
2965+
}
2966+
}
2967+
2968+
++NumFoldBranchToCommonDest;
2969+
return true;
2970+
}
2971+
27902972
/// If this basic block is simple enough, and if a predecessor branches to us
27912973
/// and one of our successors, fold the block into the predecessor and use
27922974
/// logical operations to pick the right destination.
@@ -2805,11 +2987,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
28052987

28062988
bool Changed = false;
28072989

2808-
auto _ = make_scope_exit([&]() {
2809-
if (Changed)
2810-
++NumFoldBranchToCommonDest;
2811-
});
2812-
28132990
TargetTransformInfo::TargetCostKind CostKind =
28142991
BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
28152992
: TargetTransformInfo::TCK_SizeAndLatency;
@@ -2872,9 +3049,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
28723049
return Changed;
28733050

28743051
// Finally, don't infinitely unroll conditional loops.
2875-
BasicBlock *TrueDest = BI->getSuccessor(0);
2876-
BasicBlock *FalseDest = BI->getSuccessor(1);
2877-
if (TrueDest == BB || FalseDest == BB)
3052+
if (is_contained(successors(BB), BB))
28783053
return Changed;
28793054

28803055
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
@@ -2909,174 +3084,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
29093084
continue;
29103085
}
29113086

2912-
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
2913-
Changed = true;
2914-
2915-
IRBuilder<> Builder(PBI);
2916-
// The builder is used to create instructions to eliminate the branch in BB.
2917-
// If BB's terminator has !annotation metadata, add it to the new
2918-
// instructions.
2919-
Builder.CollectMetadataToCopy(BB->getTerminator(),
2920-
{LLVMContext::MD_annotation});
2921-
2922-
// If we need to invert the condition in the pred block to match, do so now.
2923-
if (InvertPredCond) {
2924-
Value *NewCond = PBI->getCondition();
2925-
if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
2926-
CmpInst *CI = cast<CmpInst>(NewCond);
2927-
CI->setPredicate(CI->getInversePredicate());
2928-
} else {
2929-
NewCond =
2930-
Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
2931-
}
2932-
2933-
PBI->setCondition(NewCond);
2934-
PBI->swapSuccessors();
2935-
}
2936-
2937-
BasicBlock *UniqueSucc = PBI->getSuccessor(0) == BB ? TrueDest : FalseDest;
2938-
2939-
// Before cloning instructions, notify the successor basic block that it
2940-
// is about to have a new predecessor. This will update PHI nodes,
2941-
// which will allow us to update live-out uses of bonus instructions.
2942-
AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
2943-
2944-
// If we have bonus instructions, clone them into the predecessor block.
2945-
// Note that there may be multiple predecessor blocks, so we cannot move
2946-
// bonus instructions to a predecessor block.
2947-
ValueToValueMapTy VMap; // maps original values to cloned values
2948-
for (Instruction &BonusInst : *BB) {
2949-
if (isa<DbgInfoIntrinsic>(BonusInst) || isa<BranchInst>(BonusInst))
2950-
continue;
2951-
2952-
Instruction *NewBonusInst = BonusInst.clone();
2953-
2954-
if (PBI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
2955-
// Unless the instruction has the same !dbg location as the original
2956-
// branch, drop it. When we fold the bonus instructions we want to make
2957-
// sure we reset their debug locations in order to avoid stepping on
2958-
// dead code caused by folding dead branches.
2959-
NewBonusInst->setDebugLoc(DebugLoc());
2960-
}
2961-
2962-
RemapInstruction(NewBonusInst, VMap,
2963-
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2964-
VMap[&BonusInst] = NewBonusInst;
2965-
2966-
// If we moved a load, we cannot any longer claim any knowledge about
2967-
// its potential value. The previous information might have been valid
2968-
// only given the branch precondition.
2969-
// For an analogous reason, we must also drop all the metadata whose
2970-
// semantics we don't understand. We *can* preserve !annotation, because
2971-
// it is tied to the instruction itself, not the value or position.
2972-
NewBonusInst->dropUnknownNonDebugMetadata(LLVMContext::MD_annotation);
2973-
2974-
PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
2975-
NewBonusInst->takeName(&BonusInst);
2976-
BonusInst.setName(BonusInst.getName() + ".old");
2977-
BonusInst.replaceUsesWithIf(
2978-
NewBonusInst, [BB, BI, UniqueSucc, PredBlock](Use &U) {
2979-
auto *User = cast<Instruction>(U.getUser());
2980-
// Ignore non-external uses of bonus instructions.
2981-
if (User->getParent() == BB) {
2982-
assert(!isa<PHINode>(User) &&
2983-
"Non-external users are never PHI instructions.");
2984-
return false;
2985-
}
2986-
if (User->getParent() == PredBlock) {
2987-
// The "exteral" use is in the block into which we just cloned the
2988-
// bonus instruction. This means two things: 1. we are in an
2989-
// unreachable block 2. the instruction is self-referencing.
2990-
// So let's just rewrite it...
2991-
return true;
2992-
}
2993-
(void)BI;
2994-
assert(isa<PHINode>(User) && "All external users must be PHI's.");
2995-
auto *PN = cast<PHINode>(User);
2996-
assert(is_contained(successors(BB), User->getParent()) &&
2997-
"All external users must be in successors of BB.");
2998-
assert((PN->getIncomingBlock(U) == BB ||
2999-
PN->getIncomingBlock(U) == PredBlock) &&
3000-
"The incoming block for that incoming value external use "
3001-
"must be either the original block with bonus instructions, "
3002-
"or the new predecessor block.");
3003-
// UniqueSucc is the block for which we change it's predecessors,
3004-
// so it is the only block in which we'll need to update PHI nodes.
3005-
if (User->getParent() != UniqueSucc)
3006-
return false;
3007-
// Update the incoming value for the new predecessor.
3008-
return PN->getIncomingBlock(U) == PredBlock;
3009-
});
3010-
}
3011-
3012-
// Now that the Cond was cloned into the predecessor basic block,
3013-
// or/and the two conditions together.
3014-
Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(
3015-
Opc, PBI->getCondition(), VMap[BI->getCondition()], "or.cond"));
3016-
PBI->setCondition(NewCond);
3017-
3018-
uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3019-
if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3020-
SuccTrueWeight, SuccFalseWeight)) {
3021-
SmallVector<uint64_t, 8> NewWeights;
3022-
3023-
if (PBI->getSuccessor(0) == BB) {
3024-
// PBI: br i1 %x, BB, FalseDest
3025-
// BI: br i1 %y, UniqueSucc, FalseDest
3026-
// TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3027-
NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3028-
// FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3029-
// TrueWeight for PBI * FalseWeight for BI.
3030-
// We assume that total weights of a BranchInst can fit into 32 bits.
3031-
// Therefore, we will not have overflow using 64-bit arithmetic.
3032-
NewWeights.push_back(PredFalseWeight *
3033-
(SuccFalseWeight + SuccTrueWeight) +
3034-
PredTrueWeight * SuccFalseWeight);
3035-
} else {
3036-
// PBI: br i1 %x, TrueDest, BB
3037-
// BI: br i1 %y, TrueDest, UniqueSucc
3038-
// TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3039-
// FalseWeight for PBI * TrueWeight for BI.
3040-
NewWeights.push_back(PredTrueWeight *
3041-
(SuccFalseWeight + SuccTrueWeight) +
3042-
PredFalseWeight * SuccTrueWeight);
3043-
// FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3044-
NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3045-
}
3046-
3047-
// Halve the weights if any of them cannot fit in an uint32_t
3048-
FitWeights(NewWeights);
3049-
3050-
SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3051-
setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3052-
3053-
// TODO: If BB is reachable from all paths through PredBlock, then we
3054-
// could replace PBI's branch probabilities with BI's.
3055-
} else
3056-
PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3057-
3058-
PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3059-
3060-
if (DTU)
3061-
DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3062-
{DominatorTree::Delete, PredBlock, BB}});
3063-
3064-
// If BI was a loop latch, it may have had associated loop metadata.
3065-
// We need to copy it to the new latch, that is, PBI.
3066-
if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3067-
PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3068-
3069-
// Copy any debug value intrinsics into the end of PredBlock.
3070-
for (Instruction &I : *BB) {
3071-
if (isa<DbgInfoIntrinsic>(I)) {
3072-
Instruction *NewI = I.clone();
3073-
RemapInstruction(NewI, VMap,
3074-
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
3075-
NewI->insertBefore(PBI);
3076-
}
3077-
}
3078-
3079-
return Changed;
3087+
return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU);
30803088
}
30813089
return Changed;
30823090
}

0 commit comments

Comments
 (0)