@@ -2555,9 +2555,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
25552555 return scheduleFound && Schedule.getMaxStageCount () > 0 ;
25562556}
25572557
2558+ static Register findUniqueOperandDefinedInLoop (const MachineInstr &MI) {
2559+ const MachineRegisterInfo &MRI = MI.getParent ()->getParent ()->getRegInfo ();
2560+ Register Result;
2561+ for (const MachineOperand &Use : MI.all_uses ()) {
2562+ Register Reg = Use.getReg ();
2563+ if (!Reg.isVirtual ())
2564+ return Register ();
2565+ if (MRI.getVRegDef (Reg)->getParent () != MI.getParent ())
2566+ continue ;
2567+ if (Result)
2568+ return Register ();
2569+ Result = Reg;
2570+ }
2571+ return Result;
2572+ }
2573+
2574+ // / When Op is a value that is incremented recursively in a loop and there is a
2575+ // / unique instruction that increments it, returns true and sets Value.
2576+ static bool findLoopIncrementValue (const MachineOperand &Op, int &Value) {
2577+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
2578+ return false ;
2579+
2580+ Register OrgReg = Op.getReg ();
2581+ Register CurReg = OrgReg;
2582+ const MachineBasicBlock *LoopBB = Op.getParent ()->getParent ();
2583+ const MachineRegisterInfo &MRI = LoopBB->getParent ()->getRegInfo ();
2584+
2585+ const TargetInstrInfo *TII =
2586+ LoopBB->getParent ()->getSubtarget ().getInstrInfo ();
2587+ const TargetRegisterInfo *TRI =
2588+ LoopBB->getParent ()->getSubtarget ().getRegisterInfo ();
2589+
2590+ MachineInstr *Phi = nullptr ;
2591+ MachineInstr *Increment = nullptr ;
2592+
2593+ // Traverse definitions until it reaches Op or an instruction that does not
2594+ // satisfy the condition.
2595+ // Acceptable example:
2596+ // bb.0:
2597+ // %0 = PHI %3, %bb.0, ...
2598+ // %2 = ADD %0, Value
2599+ // ... = LOAD %2(Op)
2600+ // %3 = COPY %2
2601+ while (true ) {
2602+ if (!CurReg.isValid () || !CurReg.isVirtual ())
2603+ return false ;
2604+ MachineInstr *Def = MRI.getVRegDef (CurReg);
2605+ if (Def->getParent () != LoopBB)
2606+ return false ;
2607+
2608+ if (Def->isCopy ()) {
2609+ // Ignore copy instructions unless they contain subregisters
2610+ if (Def->getOperand (0 ).getSubReg () || Def->getOperand (1 ).getSubReg ())
2611+ return false ;
2612+ CurReg = Def->getOperand (1 ).getReg ();
2613+ } else if (Def->isPHI ()) {
2614+ // There must be just one Phi
2615+ if (Phi)
2616+ return false ;
2617+ Phi = Def;
2618+ CurReg = getLoopPhiReg (*Def, LoopBB);
2619+ } else if (TII->getIncrementValue (*Def, Value)) {
2620+ // Potentially a unique increment
2621+ if (Increment)
2622+ // Multiple increments exist
2623+ return false ;
2624+
2625+ const MachineOperand *BaseOp;
2626+ int64_t Offset;
2627+ bool OffsetIsScalable;
2628+ if (TII->getMemOperandWithOffset (*Def, BaseOp, Offset, OffsetIsScalable,
2629+ TRI)) {
2630+ // Pre/post increment instruction
2631+ CurReg = BaseOp->getReg ();
2632+ } else {
2633+ // If only one of the operands is defined within the loop, it is assumed
2634+ // to be an incremented value.
2635+ CurReg = findUniqueOperandDefinedInLoop (*Def);
2636+ if (!CurReg.isValid ())
2637+ return false ;
2638+ }
2639+ Increment = Def;
2640+ } else {
2641+ return false ;
2642+ }
2643+ if (CurReg == OrgReg)
2644+ break ;
2645+ }
2646+
2647+ if (!Phi || !Increment)
2648+ return false ;
2649+
2650+ return true ;
2651+ }
2652+
25582653// / Return true if we can compute the amount the instruction changes
25592654// / during each iteration. Set Delta to the amount of the change.
2560- bool SwingSchedulerDAG::computeDelta (MachineInstr &MI, unsigned &Delta) {
2655+ bool SwingSchedulerDAG::computeDelta (const MachineInstr &MI, int &Delta) {
25612656 const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
25622657 const MachineOperand *BaseOp;
25632658 int64_t Offset;
@@ -2572,24 +2667,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
25722667 if (!BaseOp->isReg ())
25732668 return false ;
25742669
2575- Register BaseReg = BaseOp->getReg ();
2576-
2577- MachineRegisterInfo &MRI = MF.getRegInfo ();
2578- // Check if there is a Phi. If so, get the definition in the loop.
2579- MachineInstr *BaseDef = MRI.getVRegDef (BaseReg);
2580- if (BaseDef && BaseDef->isPHI ()) {
2581- BaseReg = getLoopPhiReg (*BaseDef, MI.getParent ());
2582- BaseDef = MRI.getVRegDef (BaseReg);
2583- }
2584- if (!BaseDef)
2585- return false ;
2586-
2587- int D = 0 ;
2588- if (!TII->getIncrementValue (*BaseDef, D) && D >= 0 )
2589- return false ;
2590-
2591- Delta = D;
2592- return true ;
2670+ return findLoopIncrementValue (*BaseOp, Delta);
25932671}
25942672
25952673// / Check if we can change the instruction to use an offset value from the
@@ -2707,11 +2785,101 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
27072785 return Def;
27082786}
27092787
2788+ // / Return false if there is no overlap between the region accessed by BaseMI in
2789+ // / an iteration and the region accessed by OtherMI in subsequent iterations.
2790+ bool SwingSchedulerDAG::mayOverlapInLaterIter (const MachineInstr *BaseMI,
2791+ const MachineInstr *OtherMI) {
2792+ int DeltaB, DeltaO, Delta;
2793+ if (!computeDelta (*BaseMI, DeltaB) || !computeDelta (*OtherMI, DeltaO) ||
2794+ DeltaB != DeltaO)
2795+ return true ;
2796+ Delta = DeltaB;
2797+
2798+ const MachineOperand *BaseOpB, *BaseOpO;
2799+ int64_t OffsetB, OffsetO;
2800+ bool OffsetBIsScalable, OffsetOIsScalable;
2801+ const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2802+ if (!TII->getMemOperandWithOffset (*BaseMI, BaseOpB, OffsetB,
2803+ OffsetBIsScalable, TRI) ||
2804+ !TII->getMemOperandWithOffset (*OtherMI, BaseOpO, OffsetO,
2805+ OffsetOIsScalable, TRI))
2806+ return true ;
2807+
2808+ if (OffsetBIsScalable || OffsetOIsScalable)
2809+ return true ;
2810+
2811+ if (!BaseOpB->isIdenticalTo (*BaseOpO)) {
2812+ // Pass cases with different base operands but same initial values.
2813+ // Typically for when pre/post increment is used.
2814+
2815+ if (!BaseOpB->isReg () || !BaseOpO->isReg ())
2816+ return true ;
2817+ Register RegB = BaseOpB->getReg (), RegO = BaseOpO->getReg ();
2818+ if (!RegB.isVirtual () || !RegO.isVirtual ())
2819+ return true ;
2820+
2821+ MachineInstr *DefB = MRI.getVRegDef (BaseOpB->getReg ());
2822+ MachineInstr *DefO = MRI.getVRegDef (BaseOpO->getReg ());
2823+ if (!DefB || !DefO || !DefB->isPHI () || !DefO->isPHI ())
2824+ return true ;
2825+
2826+ unsigned InitValB = 0 ;
2827+ unsigned LoopValB = 0 ;
2828+ unsigned InitValO = 0 ;
2829+ unsigned LoopValO = 0 ;
2830+ getPhiRegs (*DefB, BB, InitValB, LoopValB);
2831+ getPhiRegs (*DefO, BB, InitValO, LoopValO);
2832+ MachineInstr *InitDefB = MRI.getVRegDef (InitValB);
2833+ MachineInstr *InitDefO = MRI.getVRegDef (InitValO);
2834+
2835+ if (!InitDefB->isIdenticalTo (*InitDefO))
2836+ return true ;
2837+ }
2838+
2839+ LocationSize AccessSizeB = (*BaseMI->memoperands_begin ())->getSize ();
2840+ LocationSize AccessSizeO = (*OtherMI->memoperands_begin ())->getSize ();
2841+
2842+ // This is the main test, which checks the offset values and the loop
2843+ // increment value to determine if the accesses may be loop carried.
2844+ if (!AccessSizeB.hasValue () || !AccessSizeO.hasValue ())
2845+ return true ;
2846+
2847+ LLVM_DEBUG ({
2848+ dbgs () << " Overlap check:\n " ;
2849+ dbgs () << " BaseMI: " ;
2850+ BaseMI->dump ();
2851+ dbgs () << " Base + " << OffsetB << " + I * " << Delta
2852+ << " , Len: " << AccessSizeB.getValue () << " \n " ;
2853+ dbgs () << " OtherMI: " ;
2854+ OtherMI->dump ();
2855+ dbgs () << " Base + " << OffsetO << " + I * " << Delta
2856+ << " , Len: " << AccessSizeO.getValue () << " \n " ;
2857+ });
2858+
2859+ if (Delta < 0 ) {
2860+ int64_t BaseMinAddr = OffsetB;
2861+ int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue () - 1 ;
2862+ if (BaseMinAddr > OhterNextIterMaxAddr) {
2863+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2864+ return false ;
2865+ }
2866+ } else {
2867+ int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue () - 1 ;
2868+ int64_t OtherNextIterMinAddr = OffsetO + Delta;
2869+ if (BaseMaxAddr < OtherNextIterMinAddr) {
2870+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2871+ return false ;
2872+ }
2873+ }
2874+ LLVM_DEBUG (dbgs () << " Result: Overlap\n " );
2875+ return true ;
2876+ }
2877+
27102878// / Return true for an order or output dependence that is loop carried
27112879// / potentially. A dependence is loop carried if the destination defines a value
27122880// / that may be used or defined by the source in a subsequent iteration.
27132881bool SwingSchedulerDAG::isLoopCarriedDep (SUnit *Source, const SDep &Dep,
2714- bool isSucc ) {
2882+ bool IsSucc ) {
27152883 if ((Dep.getKind () != SDep::Order && Dep.getKind () != SDep::Output) ||
27162884 Dep.isArtificial () || Dep.getSUnit ()->isBoundaryNode ())
27172885 return false ;
@@ -2724,7 +2892,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
27242892
27252893 MachineInstr *SI = Source->getInstr ();
27262894 MachineInstr *DI = Dep.getSUnit ()->getInstr ();
2727- if (!isSucc )
2895+ if (!IsSucc )
27282896 std::swap (SI, DI);
27292897 assert (SI != nullptr && DI != nullptr && " Expecting SUnit with an MI." );
27302898
@@ -2740,61 +2908,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
27402908 // The conservative assumption is that a dependence between memory operations
27412909 // may be loop carried. The following code checks when it can be proved that
27422910 // there is no loop carried dependence.
2743- unsigned DeltaS, DeltaD;
2744- if (!computeDelta (*SI, DeltaS) || !computeDelta (*DI, DeltaD))
2745- return true ;
2746-
2747- const MachineOperand *BaseOpS, *BaseOpD;
2748- int64_t OffsetS, OffsetD;
2749- bool OffsetSIsScalable, OffsetDIsScalable;
2750- const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2751- if (!TII->getMemOperandWithOffset (*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2752- TRI) ||
2753- !TII->getMemOperandWithOffset (*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2754- TRI))
2755- return true ;
2756-
2757- assert (!OffsetSIsScalable && !OffsetDIsScalable &&
2758- " Expected offsets to be byte offsets" );
2759-
2760- MachineInstr *DefS = MRI.getVRegDef (BaseOpS->getReg ());
2761- MachineInstr *DefD = MRI.getVRegDef (BaseOpD->getReg ());
2762- if (!DefS || !DefD || !DefS->isPHI () || !DefD->isPHI ())
2763- return true ;
2764-
2765- unsigned InitValS = 0 ;
2766- unsigned LoopValS = 0 ;
2767- unsigned InitValD = 0 ;
2768- unsigned LoopValD = 0 ;
2769- getPhiRegs (*DefS, BB, InitValS, LoopValS);
2770- getPhiRegs (*DefD, BB, InitValD, LoopValD);
2771- MachineInstr *InitDefS = MRI.getVRegDef (InitValS);
2772- MachineInstr *InitDefD = MRI.getVRegDef (InitValD);
2773-
2774- if (!InitDefS->isIdenticalTo (*InitDefD))
2775- return true ;
2776-
2777- // Check that the base register is incremented by a constant value for each
2778- // iteration.
2779- MachineInstr *LoopDefS = MRI.getVRegDef (LoopValS);
2780- int D = 0 ;
2781- if (!LoopDefS || !TII->getIncrementValue (*LoopDefS, D))
2782- return true ;
2783-
2784- LocationSize AccessSizeS = (*SI->memoperands_begin ())->getSize ();
2785- LocationSize AccessSizeD = (*DI->memoperands_begin ())->getSize ();
2786-
2787- // This is the main test, which checks the offset values and the loop
2788- // increment value to determine if the accesses may be loop carried.
2789- if (!AccessSizeS.hasValue () || !AccessSizeD.hasValue ())
2790- return true ;
2791-
2792- if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue () ||
2793- DeltaD < AccessSizeD.getValue ())
2794- return true ;
2795-
2796- return (OffsetS + (int64_t )AccessSizeS.getValue () <
2797- OffsetD + (int64_t )AccessSizeD.getValue ());
2911+ return mayOverlapInLaterIter (DI, SI);
27982912}
27992913
28002914void SwingSchedulerDAG::postProcessDAG () {
0 commit comments