1919
2020#include " EraVM.h"
2121#include " llvm/ADT/STLExtras.h"
22+ #include " llvm/Analysis/LoopInfo.h"
23+ #include " llvm/IR/Dominators.h"
2224#include " llvm/IR/IRBuilder.h"
2325#include " llvm/IR/InstrTypes.h"
2426#include " llvm/IR/PatternMatch.h"
27+ #include " llvm/InitializePasses.h"
2528
2629using namespace llvm ;
2730using namespace llvm ::PatternMatch;
@@ -30,6 +33,14 @@ using namespace llvm::PatternMatch;
3033#define ERAVM_POST_CODEGEN_PREPARE_NAME \
3134 " EraVM optimizations after CodeGenPrepare pass"
3235
36+ static cl::opt<bool > EnableSplitLoopPHILiveRanges (
37+ " eravm-enable-split-loop-phi-live-ranges" , cl::Hidden, cl::init(true ),
38+ cl::desc(" Enable splitting live ranges of PHI nodes in loops" ));
39+
40+ static cl::opt<unsigned > NumOfPHIUsesToSplitLiveRanges (
41+ " eravm-num-of-phi-uses-to-split-live-ranges" , cl::Hidden, cl::init(20 ),
42+ cl::desc(" Number of uses of PHI node to consider splitting live ranges" ));
43+
3344namespace {
3445struct EraVMPostCodegenPrepare : public FunctionPass {
3546public:
@@ -44,6 +55,9 @@ struct EraVMPostCodegenPrepare : public FunctionPass {
4455 }
4556
4657 void getAnalysisUsage (AnalysisUsage &AU) const override {
58+ AU.addRequired <DominatorTreeWrapperPass>();
59+ AU.addRequired <LoopInfoWrapperPass>();
60+ AU.addPreserved <LoopInfoWrapperPass>();
4761 FunctionPass::getAnalysisUsage (AU);
4862 }
4963};
@@ -303,7 +317,229 @@ static bool rearrangeOverflowHandlingBranches(Function &F) {
303317 return Changed;
304318}
305319
306- static bool runImpl (Function &F) {
320+ struct InstSplitInfo {
321+ using InstInfo = std::pair<Instruction *, APInt>;
322+ // Vector of instructions with their original constant operand.
323+ SmallVector<InstInfo, 4 > Insts;
324+ // Index of the last instruction that will be used to split the live range
325+ // in the dominatee blocks.
326+ uint64_t LastSplitIdx = UINT64_MAX;
327+
328+ // Add instruction and update the last split index if needed.
329+ void addInst (Instruction *I) {
330+ assert (isa<ConstantInt>(I->getOperand (1 )) && " Expected constant operand" );
331+
332+ // Update the index if this instruction is closer to the end of the block.
333+ if (LastSplitIdx == UINT64_MAX ||
334+ !I->comesBefore (Insts[LastSplitIdx].first ))
335+ LastSplitIdx = Insts.size ();
336+
337+ Insts.push_back ({I, cast<ConstantInt>(I->getOperand (1 ))->getValue ()});
338+ }
339+
340+ // Get the last instruction that will be used to split the live range.
341+ InstInfo &getLastSplitInst () {
342+ assert (LastSplitIdx < Insts.size () && " Expected valid index" );
343+ return Insts[LastSplitIdx];
344+ }
345+ };
346+
347+ // Update operands of instruction in InstInfo with the instruction in
348+ // DomInstInfo. Basically, we are doing the following transformation:
349+ // %Dominator = add %In, DomImm
350+ // %I = add %In, Imm
351+ // to
352+ // %Dominator = add %In, DomImm
353+ // %I = add %Dominator, Imm - DomImm
354+ void updateInstOperands (InstSplitInfo::InstInfo &InstInfo,
355+ InstSplitInfo::InstInfo &DomInstInfo,
356+ const DominatorTree &DT) {
357+ auto [Inst, Imm] = InstInfo;
358+ auto [DomInst, DomImm] = DomInstInfo;
359+ assert (DT.dominates (DomInst, Inst) && " Expected dominator instruction" );
360+
361+ Inst->setOperand (0 , DomInst);
362+ Inst->setOperand (1 , ConstantInt::get (Inst->getType (), Imm - DomImm));
363+
364+ // TODO: Relax this check, since we don't need to drop poison
365+ // flags in all cases.
366+ Inst->dropPoisonGeneratingFlags ();
367+ }
368+
369+ // This function splits PHI nodes live ranges, if users are add instructions
370+ // with a constant operand. This is useful for loops with large switch
371+ // statements where PHI nodes are used frequently, and we want to keep these
372+ // variables in a single register.
373+ // In case regalloc is not able to keep these variables in a single register,
374+ // we will get something like this in all cases of the switch where variable
375+ // is used:
376+ // preheader:
377+ // %r1 = def
378+ // header:
379+ // jump @JTI
380+ // ...
381+ // bb1:
382+ // %r2 = add %r1, 1
383+ // bcc bb2
384+ // bb2:
385+ // %r1 = copy %r2 <- regalloc is not able to keep variable in the same reg
386+ // b latch
387+ // ...
388+ // latch:
389+ // bcc header
390+ //
391+ // Ideally, we would like to have something like this:
392+ // preheader:
393+ // %r1 = def
394+ // header:
395+ // jump @JTI
396+ // ...
397+ // bb1:
398+ // %r1 = add %r1, 1 <- regalloc managed to keep variable in the same reg
399+ // bcc bb2
400+ // bb2:
401+ // <- no need for copy instruction
402+ // b latch
403+ // ...
404+ // latch:
405+ // bcc header
406+ //
407+ // To help regalloc to try to preserve frequently used PHI nodes in a single
408+ // register we are finding add instructions with constant operands that are
409+ // users of the PHI, and changing first operand of the add instruction to the
410+ // nearest dominating add instruction while updating the constant operand. This
411+ // way, regalloc will have a better chance to keep the variable in the same
412+ // register, since we changed the intervals of the variable.
413+ // For example, we are transforming this:
414+ // header:
415+ // %phi = phi
416+ // ...
417+ // bb1:
418+ // %add1 = add %phi, 64
419+ // ...
420+ // %add2 = add %phi, -64
421+ // bcc bb2
422+ // bb2:
423+ // %add3 = add %phi, -32
424+ // ...
425+ // %add4 = add %phi, -96
426+ // ...
427+ // b latch
428+ // latch:
429+ // bcc header
430+ //
431+ // To this (where add3 and add4 are updated with the nearest dominating add,
432+ // which is add2):
433+ // header:
434+ // %phi = phi
435+ // ...
436+ // bb1:
437+ // %add1 = add %phi, 64
438+ // ...
439+ // %add2 = add %phi, -64
440+ // bcc bb2
441+ // bb2:
442+ // %add3 = add %add2, 32
443+ // ...
444+ // %add4 = add %add2, -32
445+ // ...
446+ // b latch
447+ // latch:
448+ // bcc header
449+ //
450+ // In order to do so, we are doing the following steps:
451+ // 1. Find all users of the PHI node in the loop that are add instructions
452+ // with constant operands, and updating the index of the instruction that
453+ // is closer to the end of the block. This instruction will be used to
454+ // split ranges in the dominatee blocks.
455+ // 2. For each block, we are finding the nearest dominator block from which
456+ // we can split the live range.
457+ // 3. Update the instructions in the block with the nearest dominator by
458+ // changing the first operands to the dominator instruction and updating
459+ // the constant operands.
460+ static bool splitPHILiveRange (PHINode &Phi, const LoopInfo &LI, const Loop &L,
461+ const DominatorTree &DT) {
462+ assert (Phi.getParent () == L.getHeader () &&
463+ " Expected PHI node in a loop header" );
464+ DomTreeNode *LoopHeaderNode = DT.getNode (L.getHeader ());
465+ if (!LoopHeaderNode)
466+ return false ;
467+
468+ DenseMap<BasicBlock *, InstSplitInfo> Splits;
469+ for (auto *U : Phi.users ()) {
470+ // Only collect add instructions with constant operands.
471+ auto *UI = cast<Instruction>(U);
472+ if (!UI || UI->getOpcode () != Instruction::Add ||
473+ !isa<ConstantInt>(UI->getOperand (1 )) ||
474+ LI.getLoopFor (UI->getParent ()) != &L)
475+ continue ;
476+ Splits[UI->getParent ()].addInst (UI);
477+ }
478+
479+ // If there are no at least two blocks, we can't split the live range,
480+ // since we need dominator and dominatee blocks to do so.
481+ if (Splits.size () < 2 )
482+ return false ;
483+
484+ // Split ranges across blocks. This is done by finding the nearest
485+ // dominator block, from which we can split the live range.
486+ bool Changed = false ;
487+ for (auto &[BB, Infos] : Splits) {
488+ DomTreeNode *Node = DT.getNode (BB);
489+ if (!Node)
490+ continue ;
491+
492+ // Find the nearest dominator, from which we can split the live range.
493+ InstSplitInfo::InstInfo *NearestDominator = nullptr ;
494+ while ((Node = Node->getIDom ())) {
495+ auto I = Splits.find (Node->getBlock ());
496+ if (I != Splits.end ()) {
497+ // We found the nearest dominator block, so take the last
498+ // instruction from it.
499+ NearestDominator = &I->second .getLastSplitInst ();
500+ break ;
501+ }
502+
503+ // Bail out if we reached the start of the loop.
504+ if (Node == LoopHeaderNode)
505+ break ;
506+ }
507+
508+ // If we didn't find any dominator, skip this BB.
509+ if (!NearestDominator)
510+ continue ;
511+
512+ // Update instructions in the block with the nearest dominator.
513+ for (auto &Info : Infos.Insts )
514+ updateInstOperands (Info, *NearestDominator, DT);
515+
516+ // TODO: Relax this check, since we don't need to drop poison
517+ // flags in all cases.
518+ NearestDominator->first ->dropPoisonGeneratingFlags ();
519+ Changed = true ;
520+ }
521+ return Changed;
522+ }
523+
524+ // This optimization tries to split live ranges of PHI nodes in a loop,
525+ // with a large number of users.
526+ static bool splitLoopPHILiveRanges (Function &F, LoopInfo &LI,
527+ DominatorTree &DT) {
528+ if (!EnableSplitLoopPHILiveRanges)
529+ return false ;
530+
531+ bool Changed = false ;
532+ for (auto *L : LI) {
533+ for (auto &Phi : L->getHeader ()->phis ()) {
534+ if (Phi.getNumUses () <= NumOfPHIUsesToSplitLiveRanges)
535+ continue ;
536+ Changed |= splitPHILiveRange (Phi, LI, *L, DT);
537+ }
538+ }
539+ return Changed;
540+ }
541+
542+ static bool runImpl (Function &F, LoopInfo &LI, DominatorTree &DT) {
307543 bool Changed = false ;
308544 for (auto &BB : F) {
309545 for (auto &I : llvm::make_early_inc_range (BB)) {
@@ -320,28 +556,38 @@ static bool runImpl(Function &F) {
320556 }
321557 }
322558
559+ Changed |= splitLoopPHILiveRanges (F, LI, DT);
323560 Changed |= rearrangeOverflowHandlingBranches (F);
324561 return Changed;
325562}
326563
327564bool EraVMPostCodegenPrepare::runOnFunction (Function &F) {
328565 if (skipFunction (F))
329566 return false ;
330- return runImpl (F);
567+
568+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo ();
569+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree ();
570+ return runImpl (F, LI, DT);
331571}
332572
333573char EraVMPostCodegenPrepare::ID = 0 ;
334574
335- INITIALIZE_PASS (EraVMPostCodegenPrepare, DEBUG_TYPE,
336- ERAVM_POST_CODEGEN_PREPARE_NAME, false , false )
575+ INITIALIZE_PASS_BEGIN (EraVMPostCodegenPrepare, DEBUG_TYPE,
576+ ERAVM_POST_CODEGEN_PREPARE_NAME, false , false )
577+ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
578+ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
579+ INITIALIZE_PASS_END(EraVMPostCodegenPrepare, DEBUG_TYPE,
580+ ERAVM_POST_CODEGEN_PREPARE_NAME, false , false )
337581
338582FunctionPass *llvm::createEraVMPostCodegenPreparePass() {
339583 return new EraVMPostCodegenPrepare ();
340584}
341585
342586PreservedAnalyses
343587EraVMPostCodegenPreparePass::run (Function &F, FunctionAnalysisManager &AM) {
344- if (runImpl (F))
588+ auto &LI = AM.getResult <LoopAnalysis>(F);
589+ auto &DT = AM.getResult <DominatorTreeAnalysis>(F);
590+ if (runImpl (F, LI, DT))
345591 return PreservedAnalyses::none ();
346592 return PreservedAnalyses::all ();
347593}
0 commit comments