1919#include " llvm/Analysis/RegionInfo.h"
2020#include " llvm/Analysis/RegionIterator.h"
2121#include " llvm/Analysis/RegionPass.h"
22+ #include " llvm/Analysis/TargetTransformInfo.h"
2223#include " llvm/Analysis/UniformityAnalysis.h"
2324#include " llvm/IR/BasicBlock.h"
2425#include " llvm/IR/CFG.h"
@@ -128,6 +129,7 @@ struct PredInfo {
128129using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
129130using PredMap = DenseMap<BasicBlock *, BBPredicates>;
130131using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
132+ using Val2BBMap = DenseMap<Value *, BasicBlock *>;
131133
132134// A traits type that is intended to be used in graph algorithms. The graph
133135// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -279,7 +281,7 @@ class StructurizeCFG {
279281 ConstantInt *BoolTrue;
280282 ConstantInt *BoolFalse;
281283 Value *BoolPoison;
282-
284+ const TargetTransformInfo *TTI;
283285 Function *Func;
284286 Region *ParentRegion;
285287
@@ -301,8 +303,12 @@ class StructurizeCFG {
301303 PredMap LoopPreds;
302304 BranchVector LoopConds;
303305
306+ Val2BBMap HoistedValues;
307+
304308 RegionNode *PrevNode;
305309
310+ void hoistZeroCostElseBlockPhiValues (BasicBlock *ElseBB, BasicBlock *ThenBB);
311+
306312 void orderNodes ();
307313
308314 void analyzeLoops (RegionNode *N);
@@ -332,6 +338,8 @@ class StructurizeCFG {
332338
333339 void simplifyAffectedPhis ();
334340
341+ void simplifyHoistedPhis ();
342+
335343 DebugLoc killTerminator (BasicBlock *BB);
336344
337345 void changeExit (RegionNode *Node, BasicBlock *NewExit,
@@ -359,7 +367,7 @@ class StructurizeCFG {
359367
360368public:
361369 void init (Region *R);
362- bool run (Region *R, DominatorTree *DT);
370+ bool run (Region *R, DominatorTree *DT, const TargetTransformInfo *TTI );
363371 bool makeUniformRegion (Region *R, UniformityInfo &UA);
364372};
365373
@@ -385,16 +393,21 @@ class StructurizeCFGLegacyPass : public RegionPass {
385393 if (SCFG.makeUniformRegion (R, UA))
386394 return false ;
387395 }
396+ Function *F = R->getEntry ()->getParent ();
397+ const TargetTransformInfo *TTI =
398+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI (*F);
388399 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree ();
389- return SCFG.run (R, DT);
400+ return SCFG.run (R, DT, TTI );
390401 }
391402
392403 StringRef getPassName () const override { return " Structurize control flow" ; }
393404
394405 void getAnalysisUsage (AnalysisUsage &AU) const override {
395406 if (SkipUniformRegions)
396407 AU.addRequired <UniformityInfoWrapperPass>();
408+ AU.addRequired <TargetTransformInfoWrapperPass>();
397409 AU.addRequired <DominatorTreeWrapperPass>();
410+ AU.addRequired <TargetTransformInfoWrapperPass>();
398411
399412 AU.addPreserved <DominatorTreeWrapperPass>();
400413 RegionPass::getAnalysisUsage (AU);
@@ -403,6 +416,34 @@ class StructurizeCFGLegacyPass : public RegionPass {
403416
404417} // end anonymous namespace
405418
419+ // / Checks whether an instruction is zero cost instruction and checks if the
420+ // / operands are from different BB. If so, this instruction can be coalesced
421+ // / if its hoisted to predecessor block. So, this returns true.
422+ static bool isHoistableInstruction (Instruction *I, BasicBlock *BB,
423+ const TargetTransformInfo *TTI) {
424+ if (I->getParent () != BB)
425+ return false ;
426+
427+ // If the instruction is not a zero cost instruction, return false.
428+ auto Cost = TTI->getInstructionCost (I, TargetTransformInfo::TCK_Latency);
429+ InstructionCost::CostType CostVal =
430+ Cost.isValid ()
431+ ? Cost.getValue ()
432+ : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
433+ if (CostVal != 0 )
434+ return false ;
435+
436+ // Check if any operands are instructions defined in the same block.
437+ for (auto &Op : I->operands ()) {
438+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
439+ if (OpI->getParent () == BB)
440+ return false ;
441+ }
442+ }
443+
444+ return true ;
445+ }
446+
406447char StructurizeCFGLegacyPass::ID = 0 ;
407448
408449INITIALIZE_PASS_BEGIN (StructurizeCFGLegacyPass, " structurizecfg" ,
@@ -413,6 +454,39 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
413454INITIALIZE_PASS_END(StructurizeCFGLegacyPass, " structurizecfg" ,
414455 " Structurize the CFG" , false , false )
415456
457+ // / Structurization can introduce unnecessary VGPR copies due to register
458+ // / coalescing interference. For example, if the Else block has a zero-cost
459+ // / instruction and the Then block modifies the VGPR value, only one value is
460+ // / live at a time in merge block before structurization. After structurization,
461+ // / the coalescer may incorrectly treat the Then value as live in the Else block
462+ // / (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
463+ // /
464+ // / This function examines phi nodes whose incoming values are zero-cost
465+ // / instructions in the Else block. It identifies such values that can be safely
466+ // / hoisted and moves them to the nearest common dominator of Then and Else
467+ // / blocks. A follow-up function after setting PhiNodes assigns the hoisted
468+ // / value to poison phi nodes along the if→flow edge, aiding register coalescing
469+ // / and minimizing unnecessary live ranges.
470+ void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
471+ BasicBlock *ThenBB) {
472+
473+ BasicBlock *ElseSucc = ElseBB->getSingleSuccessor ();
474+ BasicBlock *CommonDominator = DT->findNearestCommonDominator (ElseBB, ThenBB);
475+
476+ if (!ElseSucc || !CommonDominator)
477+ return ;
478+ Instruction *Term = CommonDominator->getTerminator ();
479+ for (PHINode &Phi : ElseSucc->phis ()) {
480+ Value *ElseVal = Phi.getIncomingValueForBlock (ElseBB);
481+ auto *Inst = dyn_cast<Instruction>(ElseVal);
482+ if (!Inst || !isHoistableInstruction (Inst, ElseBB, TTI))
483+ continue ;
484+ Inst->removeFromParent ();
485+ Inst->insertInto (CommonDominator, Term->getIterator ());
486+ HoistedValues[Inst] = CommonDominator;
487+ }
488+ }
489+
416490// / Build up the general order of nodes, by performing a topological sort of the
417491// / parent region's nodes, while ensuring that there is no outer cycle node
418492// / between any two inner cycle nodes.
@@ -535,7 +609,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
535609 BasicBlock *Other = Term->getSuccessor (!i);
536610 if (Visited.count (Other) && !Loops.count (Other) &&
537611 !Pred.count (Other) && !Pred.count (P)) {
538-
612+ hoistZeroCostElseBlockPhiValues (Succ, Other);
539613 Pred[Other] = {BoolFalse, std::nullopt };
540614 Pred[P] = {BoolTrue, std::nullopt };
541615 continue ;
@@ -891,6 +965,44 @@ void StructurizeCFG::setPhiValues() {
891965 AffectedPhis.append (InsertedPhis.begin (), InsertedPhis.end ());
892966}
893967
968+ // / Updates PHI nodes after hoisted zero cost instructions by replacing poison
969+ // / entries on Flow nodes with the appropriate hoisted values
970+ void StructurizeCFG::simplifyHoistedPhis () {
971+ for (WeakVH VH : AffectedPhis) {
972+ PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
973+ if (!Phi || Phi->getNumIncomingValues () != 2 )
974+ continue ;
975+
976+ for (int i = 0 ; i < 2 ; i++) {
977+ Value *V = Phi->getIncomingValue (i);
978+ auto BBIt = HoistedValues.find (V);
979+
980+ if (BBIt == HoistedValues.end ())
981+ continue ;
982+
983+ Value *OtherV = Phi->getIncomingValue (!i);
984+ PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
985+ if (!OtherPhi)
986+ continue ;
987+
988+ int PoisonValBBIdx = -1 ;
989+ for (size_t i = 0 ; i < OtherPhi->getNumIncomingValues (); i++) {
990+ if (!isa<PoisonValue>(OtherPhi->getIncomingValue (i)))
991+ continue ;
992+ PoisonValBBIdx = i;
993+ break ;
994+ }
995+ if (PoisonValBBIdx == -1 ||
996+ !DT->dominates (BBIt->second ,
997+ OtherPhi->getIncomingBlock (PoisonValBBIdx)))
998+ continue ;
999+
1000+ OtherPhi->setIncomingValue (PoisonValBBIdx, V);
1001+ Phi->setIncomingValue (i, OtherV);
1002+ }
1003+ }
1004+ }
1005+
8941006void StructurizeCFG::simplifyAffectedPhis () {
8951007 bool Changed;
8961008 do {
@@ -1283,12 +1395,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
12831395}
12841396
12851397// / Run the transformation for each region found
1286- bool StructurizeCFG::run (Region *R, DominatorTree *DT) {
1398+ bool StructurizeCFG::run (Region *R, DominatorTree *DT,
1399+ const TargetTransformInfo *TTI) {
12871400 if (R->isTopLevelRegion ())
12881401 return false ;
12891402
12901403 this ->DT = DT;
1291-
1404+ this -> TTI = TTI;
12921405 Func = R->getEntry ()->getParent ();
12931406 assert (hasOnlySimpleTerminator (*Func) && " Unsupported block terminator." );
12941407
@@ -1300,6 +1413,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
13001413 insertConditions (false );
13011414 insertConditions (true );
13021415 setPhiValues ();
1416+ simplifyHoistedPhis ();
13031417 simplifyConditions ();
13041418 simplifyAffectedPhis ();
13051419 rebuildSSA ();
@@ -1349,7 +1463,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
13491463 bool Changed = false ;
13501464 DominatorTree *DT = &AM.getResult <DominatorTreeAnalysis>(F);
13511465 auto &RI = AM.getResult <RegionInfoAnalysis>(F);
1352-
1466+ TargetTransformInfo *TTI = &AM. getResult <TargetIRAnalysis>(F);
13531467 UniformityInfo *UI = nullptr ;
13541468 if (SkipUniformRegions)
13551469 UI = &AM.getResult <UniformityInfoAnalysis>(F);
@@ -1368,7 +1482,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
13681482 continue ;
13691483 }
13701484
1371- Changed |= SCFG.run (R, DT);
1485+ Changed |= SCFG.run (R, DT, TTI );
13721486 }
13731487 if (!Changed)
13741488 return PreservedAnalyses::all ();
0 commit comments