@@ -2521,9 +2521,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
25212521 return scheduleFound && Schedule.getMaxStageCount () > 0 ;
25222522}
25232523
2524+ static Register findUniqueOperandDefinedInLoop (const MachineInstr &MI) {
2525+ const MachineRegisterInfo &MRI = MI.getParent ()->getParent ()->getRegInfo ();
2526+ Register Result;
2527+ for (const MachineOperand &Use : MI.all_uses ()) {
2528+ Register Reg = Use.getReg ();
2529+ if (!Reg.isVirtual ())
2530+ return Register ();
2531+ if (MRI.getVRegDef (Reg)->getParent () != MI.getParent ())
2532+ continue ;
2533+ if (Result)
2534+ return Register ();
2535+ Result = Reg;
2536+ }
2537+ return Result;
2538+ }
2539+
2540+ // / When Op is a value that is incremented recursively in a loop and there is a
2541+ // / unique instruction that increments it, returns true and sets Value.
2542+ static bool findLoopIncrementValue (const MachineOperand &Op, int &Value) {
2543+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
2544+ return false ;
2545+
2546+ Register OrgReg = Op.getReg ();
2547+ Register CurReg = OrgReg;
2548+ const MachineBasicBlock *LoopBB = Op.getParent ()->getParent ();
2549+ const MachineRegisterInfo &MRI = LoopBB->getParent ()->getRegInfo ();
2550+
2551+ const TargetInstrInfo *TII =
2552+ LoopBB->getParent ()->getSubtarget ().getInstrInfo ();
2553+ const TargetRegisterInfo *TRI =
2554+ LoopBB->getParent ()->getSubtarget ().getRegisterInfo ();
2555+
2556+ MachineInstr *Phi = nullptr ;
2557+ MachineInstr *Increment = nullptr ;
2558+
2559+ // Traverse definitions until it reaches Op or an instruction that does not
2560+ // satisfy the condition.
2561+ // Acceptable example:
2562+ // bb.0:
2563+ // %0 = PHI %3, %bb.0, ...
2564+ // %2 = ADD %0, Value
2565+ // ... = LOAD %2(Op)
2566+ // %3 = COPY %2
2567+ while (true ) {
2568+ if (!CurReg.isValid () || !CurReg.isVirtual ())
2569+ return false ;
2570+ MachineInstr *Def = MRI.getVRegDef (CurReg);
2571+ if (Def->getParent () != LoopBB)
2572+ return false ;
2573+
2574+ if (Def->isCopy ()) {
2575+ // Ignore copy instructions unless they contain subregisters
2576+ if (Def->getOperand (0 ).getSubReg () || Def->getOperand (1 ).getSubReg ())
2577+ return false ;
2578+ CurReg = Def->getOperand (1 ).getReg ();
2579+ } else if (Def->isPHI ()) {
2580+ // There must be just one Phi
2581+ if (Phi)
2582+ return false ;
2583+ Phi = Def;
2584+ CurReg = getLoopPhiReg (*Def, LoopBB);
2585+ } else if (TII->getIncrementValue (*Def, Value)) {
2586+ // Potentially a unique increment
2587+ if (Increment)
2588+ // Multiple increments exist
2589+ return false ;
2590+
2591+ const MachineOperand *BaseOp;
2592+ int64_t Offset;
2593+ bool OffsetIsScalable;
2594+ if (TII->getMemOperandWithOffset (*Def, BaseOp, Offset, OffsetIsScalable,
2595+ TRI)) {
2596+ // Pre/post increment instruction
2597+ CurReg = BaseOp->getReg ();
2598+ } else {
2599+ // If only one of the operands is defined within the loop, it is assumed
2600+ // to be an incremented value.
2601+ CurReg = findUniqueOperandDefinedInLoop (*Def);
2602+ if (!CurReg.isValid ())
2603+ return false ;
2604+ }
2605+ Increment = Def;
2606+ } else {
2607+ return false ;
2608+ }
2609+ if (CurReg == OrgReg)
2610+ break ;
2611+ }
2612+
2613+ if (!Phi || !Increment)
2614+ return false ;
2615+
2616+ return true ;
2617+ }
2618+
25242619// / Return true if we can compute the amount the instruction changes
25252620// / during each iteration. Set Delta to the amount of the change.
2526- bool SwingSchedulerDAG::computeDelta (MachineInstr &MI, unsigned &Delta) const {
2621+ bool SwingSchedulerDAG::computeDelta (const MachineInstr &MI, int &Delta) const {
25272622 const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
25282623 const MachineOperand *BaseOp;
25292624 int64_t Offset;
@@ -2538,24 +2633,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {
25382633 if (!BaseOp->isReg ())
25392634 return false ;
25402635
2541- Register BaseReg = BaseOp->getReg ();
2542-
2543- MachineRegisterInfo &MRI = MF.getRegInfo ();
2544- // Check if there is a Phi. If so, get the definition in the loop.
2545- MachineInstr *BaseDef = MRI.getVRegDef (BaseReg);
2546- if (BaseDef && BaseDef->isPHI ()) {
2547- BaseReg = getLoopPhiReg (*BaseDef, MI.getParent ());
2548- BaseDef = MRI.getVRegDef (BaseReg);
2549- }
2550- if (!BaseDef)
2551- return false ;
2552-
2553- int D = 0 ;
2554- if (!TII->getIncrementValue (*BaseDef, D) && D >= 0 )
2555- return false ;
2556-
2557- Delta = D;
2558- return true ;
2636+ return findLoopIncrementValue (*BaseOp, Delta);
25592637}
25602638
25612639// / Check if we can change the instruction to use an offset value from the
@@ -2673,6 +2751,100 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
26732751 return Def;
26742752}
26752753
2754+ // / Return false if there is no overlap between the region accessed by BaseMI in
2755+ // / an iteration and the region accessed by OtherMI in subsequent iterations.
2756+ bool SwingSchedulerDAG::mayOverlapInLaterIter (
2757+ const MachineInstr *BaseMI, const MachineInstr *OtherMI) const {
2758+ int DeltaB, DeltaO, Delta;
2759+ if (!computeDelta (*BaseMI, DeltaB) || !computeDelta (*OtherMI, DeltaO) ||
2760+ DeltaB != DeltaO)
2761+ return true ;
2762+ Delta = DeltaB;
2763+
2764+ const MachineOperand *BaseOpB, *BaseOpO;
2765+ int64_t OffsetB, OffsetO;
2766+ bool OffsetBIsScalable, OffsetOIsScalable;
2767+ const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2768+ if (!TII->getMemOperandWithOffset (*BaseMI, BaseOpB, OffsetB,
2769+ OffsetBIsScalable, TRI) ||
2770+ !TII->getMemOperandWithOffset (*OtherMI, BaseOpO, OffsetO,
2771+ OffsetOIsScalable, TRI))
2772+ return true ;
2773+
2774+ if (OffsetBIsScalable || OffsetOIsScalable)
2775+ return true ;
2776+
2777+ if (!BaseOpB->isIdenticalTo (*BaseOpO)) {
2778+ // Pass cases with different base operands but same initial values.
2779+ // Typically for when pre/post increment is used.
2780+
2781+ if (!BaseOpB->isReg () || !BaseOpO->isReg ())
2782+ return true ;
2783+ Register RegB = BaseOpB->getReg (), RegO = BaseOpO->getReg ();
2784+ if (!RegB.isVirtual () || !RegO.isVirtual ())
2785+ return true ;
2786+
2787+ MachineInstr *DefB = MRI.getVRegDef (BaseOpB->getReg ());
2788+ MachineInstr *DefO = MRI.getVRegDef (BaseOpO->getReg ());
2789+ if (!DefB || !DefO || !DefB->isPHI () || !DefO->isPHI ())
2790+ return true ;
2791+
2792+ unsigned InitValB = 0 ;
2793+ unsigned LoopValB = 0 ;
2794+ unsigned InitValO = 0 ;
2795+ unsigned LoopValO = 0 ;
2796+ getPhiRegs (*DefB, BB, InitValB, LoopValB);
2797+ getPhiRegs (*DefO, BB, InitValO, LoopValO);
2798+ MachineInstr *InitDefB = MRI.getVRegDef (InitValB);
2799+ MachineInstr *InitDefO = MRI.getVRegDef (InitValO);
2800+
2801+ if (!InitDefB->isIdenticalTo (*InitDefO))
2802+ return true ;
2803+ }
2804+
2805+ LocationSize AccessSizeB = (*BaseMI->memoperands_begin ())->getSize ();
2806+ LocationSize AccessSizeO = (*OtherMI->memoperands_begin ())->getSize ();
2807+
2808+ // This is the main test, which checks the offset values and the loop
2809+ // increment value to determine if the accesses may be loop carried.
2810+ if (!AccessSizeB.hasValue () || !AccessSizeO.hasValue ())
2811+ return true ;
2812+
2813+ LLVM_DEBUG ({
2814+ dbgs () << " Overlap check:\n " ;
2815+ dbgs () << " BaseMI: " ;
2816+ BaseMI->dump ();
2817+ dbgs () << " Base + " << OffsetB << " + I * " << Delta
2818+ << " , Len: " << AccessSizeB.getValue () << " \n " ;
2819+ dbgs () << " OtherMI: " ;
2820+ OtherMI->dump ();
2821+ dbgs () << " Base + " << OffsetO << " + I * " << Delta
2822+ << " , Len: " << AccessSizeO.getValue () << " \n " ;
2823+ });
2824+
2825+ // Excessive overlap may be detected in strided patterns.
2826+ // For example, the memory addresses of the store and the load in
2827+ // for (i=0; i<n; i+=2) a[i+1] = a[i];
2828+ // are assumed to overlap.
2829+ if (Delta < 0 ) {
2830+ int64_t BaseMinAddr = OffsetB;
2831+ int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue () - 1 ;
2832+ if (BaseMinAddr > OhterNextIterMaxAddr) {
2833+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2834+ return false ;
2835+ }
2836+ } else {
2837+ int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue () - 1 ;
2838+ int64_t OtherNextIterMinAddr = OffsetO + Delta;
2839+ if (BaseMaxAddr < OtherNextIterMinAddr) {
2840+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2841+ return false ;
2842+ }
2843+ }
2844+ LLVM_DEBUG (dbgs () << " Result: Overlap\n " );
2845+ return true ;
2846+ }
2847+
26762848// / Return true for an order or output dependence that is loop carried
26772849// / potentially. A dependence is loop carried if the destination defines a value
26782850// / that may be used or defined by the source in a subsequent iteration.
@@ -2704,61 +2876,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(
27042876 // The conservative assumption is that a dependence between memory operations
27052877 // may be loop carried. The following code checks when it can be proved that
27062878 // there is no loop carried dependence.
2707- unsigned DeltaS, DeltaD;
2708- if (!computeDelta (*SI, DeltaS) || !computeDelta (*DI, DeltaD))
2709- return true ;
2710-
2711- const MachineOperand *BaseOpS, *BaseOpD;
2712- int64_t OffsetS, OffsetD;
2713- bool OffsetSIsScalable, OffsetDIsScalable;
2714- const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2715- if (!TII->getMemOperandWithOffset (*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2716- TRI) ||
2717- !TII->getMemOperandWithOffset (*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2718- TRI))
2719- return true ;
2720-
2721- assert (!OffsetSIsScalable && !OffsetDIsScalable &&
2722- " Expected offsets to be byte offsets" );
2723-
2724- MachineInstr *DefS = MRI.getVRegDef (BaseOpS->getReg ());
2725- MachineInstr *DefD = MRI.getVRegDef (BaseOpD->getReg ());
2726- if (!DefS || !DefD || !DefS->isPHI () || !DefD->isPHI ())
2727- return true ;
2728-
2729- unsigned InitValS = 0 ;
2730- unsigned LoopValS = 0 ;
2731- unsigned InitValD = 0 ;
2732- unsigned LoopValD = 0 ;
2733- getPhiRegs (*DefS, BB, InitValS, LoopValS);
2734- getPhiRegs (*DefD, BB, InitValD, LoopValD);
2735- MachineInstr *InitDefS = MRI.getVRegDef (InitValS);
2736- MachineInstr *InitDefD = MRI.getVRegDef (InitValD);
2737-
2738- if (!InitDefS->isIdenticalTo (*InitDefD))
2739- return true ;
2740-
2741- // Check that the base register is incremented by a constant value for each
2742- // iteration.
2743- MachineInstr *LoopDefS = MRI.getVRegDef (LoopValS);
2744- int D = 0 ;
2745- if (!LoopDefS || !TII->getIncrementValue (*LoopDefS, D))
2746- return true ;
2747-
2748- LocationSize AccessSizeS = (*SI->memoperands_begin ())->getSize ();
2749- LocationSize AccessSizeD = (*DI->memoperands_begin ())->getSize ();
2750-
2751- // This is the main test, which checks the offset values and the loop
2752- // increment value to determine if the accesses may be loop carried.
2753- if (!AccessSizeS.hasValue () || !AccessSizeD.hasValue ())
2754- return true ;
2755-
2756- if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue () ||
2757- DeltaD < AccessSizeD.getValue ())
2758- return true ;
2759-
2760- return (OffsetS + (int64_t )AccessSizeS.getValue () <
2761- OffsetD + (int64_t )AccessSizeD.getValue ());
2879+ return mayOverlapInLaterIter (DI, SI);
27622880}
27632881
27642882void SwingSchedulerDAG::postProcessDAG () {
0 commit comments