1818#include " llvm/ADT/MapVector.h"
1919#include " llvm/CodeGen/MachineInstr.h"
2020#include " llvm/CodeGen/MachineScheduler.h"
21+ #include < cstdint>
22+ #include < limits>
2123
2224namespace llvm {
2325
@@ -297,6 +299,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
297299
298300 std::unique_ptr<GCNSchedStage> createSchedStage (GCNSchedStageID SchedStageID);
299301
302+ void deleteMI (unsigned RegionIdx, MachineInstr *MI);
303+
300304public:
301305 GCNScheduleDAGMILive (MachineSchedContext *C,
302306 std::unique_ptr<MachineSchedStrategy> S);
@@ -432,70 +436,209 @@ class ClusteredLowOccStage : public GCNSchedStage {
432436};
433437
434438// / Attempts to reduce function spilling or, if there is no spilling, to
435- // / increase function occupancy by one with respect to ArchVGPR usage by sinking
436- // / rematerializable instructions to their use. When the stage
437- // / estimates reducing spilling or increasing occupancy is possible, as few
438- // / instructions as possible are rematerialized to reduce potential negative
439+ // / increase function occupancy by one with respect to register usage by sinking
440+ // / rematerializable instructions to their use. When the stage estimates that
441+ // / reducing spilling or increasing occupancy is possible, it tries to
442+ // / rematerialize as few registers as possible to reduce potential negative
439443// / effects on function latency.
440444class PreRARematStage : public GCNSchedStage {
441445private:
442- // / Useful information about a rematerializable instruction.
443- struct RematInstruction {
444- // / Single use of the rematerializable instruction's defined register,
445- // / located in a different block.
446+ // / Groups information about a rematerializable register.
447+ struct RematReg {
448+ // / Single MI defining the rematerializable register.
449+ MachineInstr *DefMI;
450+ // / Single user of the rematerializable register.
446451 MachineInstr *UseMI;
447- // / Rematerialized version of \p DefMI, set in
448- // / PreRARematStage::rematerialize. Used for reverting rematerializations.
449- MachineInstr *RematMI;
450- // / Set of regions in which the rematerializable instruction's defined
451- // / register is a live-in.
452- SmallDenseSet<unsigned , 4 > LiveInRegions;
453-
454- RematInstruction (MachineInstr *UseMI) : UseMI(UseMI) {}
452+ // / Using region.
453+ unsigned UseRegion;
454+ // / Regions in which the register is live-in/live-out/live anywhere.
455+ BitVector LiveIn, LiveOut, Live;
456+ // / The rematerializable register's lane bitmask.
457+ LaneBitmask Mask;
458+ // / Frequency of region defining/using the register. 0 when unknown.
459+ unsigned DefFrequency, UseFrequency;
460+
461+ RematReg (MachineInstr *DefMI, MachineInstr *UseMI,
462+ GCNScheduleDAGMILive &DAG,
463+ const DenseMap<MachineInstr *, unsigned > &MIRegion,
464+ ArrayRef<uint64_t > RegionFreq);
465+
466+ // / Returns whether the regions at which the register is live intersects
467+ // / with the \p Target regions.
468+ bool intersectWithTarget (BitVector Target) const {
469+ Target &= Live;
470+ return Target.any ();
471+ }
472+
473+ // / Returns whether is is always beneficial to rematerialize this register.
474+ bool isAlwaysBeneficial () const {
475+ // When the using region is executed a single time, we know
476+ // rematerializing will be beneficial whatever the defining region's
477+ // frequency.
478+ if (UseFrequency == 1 )
479+ return true ;
480+ // When there is uncertainty on the defining or using frequency, we err on
481+ // the conservative side and do not consider the rematerialization always
482+ // beneficial.
483+ if (!DefFrequency || !UseFrequency)
484+ return false ;
485+ return UseFrequency <= DefFrequency;
486+ }
487+
488+ // / Determines whether rematerializing the register is guaranteed to reduce
489+ // / pressure in the region.
490+ bool isBeneficialRegion (unsigned I) const {
491+ assert (I < Live.size () && " region index out of range" );
492+ return LiveIn[I] && LiveOut[I] && I != UseRegion;
493+ }
494+
495+ // / Determines whether rematerializing the register can but is not
496+ // / guaranteed to reduce pressure in the region.
497+ bool isMaybeBeneficialRegion (unsigned I) const {
498+ assert (I < Live.size () && " region index out of range" );
499+ return Live[I] && !isBeneficialRegion (I);
500+ }
501+
502+ // / Updates internal structures following a MI rematerialization. Part of
503+ // / the stage instead of the DAG because it makes assumptions that are
504+ // / specific to the rematerialization process.
505+ MachineInstr *insertMI (unsigned RegionIdx,
506+ MachineBasicBlock::iterator InsertPos,
507+ GCNScheduleDAGMILive &DAG) const ;
508+
509+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
510+ void print (const DenseMap<MachineInstr *, unsigned > &MIRegion) const ;
511+ #endif
512+ };
513+
514+ // / A scored rematerializable register. Higher scores indicate more beneficial
515+ // / rematerializations. Non-positive scores indicate the rematerialization is
516+ // / not helpful to reduce RP in target regions.
517+ struct ScoredRemat {
518+ // / The rematerializable register under consideration.
519+ const RematReg *Remat;
520+
521+ // / This only initializes state-independent characteristics of \p Remat, not
522+ // / the actual score.
523+ ScoredRemat (const RematReg *Remat, const GCNSubtarget &ST,
524+ const TargetInstrInfo &TII);
525+
526+ // / Updates the rematerialization's score w.r.t. the current \p RPTargets.
527+ // / \p RegionFreq indicates the frequency of each region
528+ void update (const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
529+ ArrayRef<uint64_t > RegionFreq, bool ReduceSpill);
530+
531+ int getScore () const { return Score; }
532+
533+ bool operator <(const ScoredRemat &O) const { return Score < O.Score ; }
534+ bool operator ==(const ScoredRemat &O) const { return Score == O.Score ; }
535+
536+ private:
537+ // / Estimated save/restore latency costs for spilling a register to stack.
538+ // / FIXME: These numbers are very arbitrary. Need a good rationale for them,
539+ // / which I don't know where to get from.
540+ static constexpr int SaveCost = 100 , RestoreCost = 100 ;
541+ // / Per-region contribution weights to RP score depending on whether RP is
542+ // / guaranteed or only likely to be reduced in the region. Only their
543+ // / relative value w.r.t. one another matter.
544+ static constexpr int WeightRP = 10 , WeightRPMaybe = 5 ;
545+
546+ // / Latency gain induced by rematerializing the instruction. Does not
547+ // / include estimated spilling cost of *not* rematerializing (save/restore
548+ // / to/from stack).
549+ std::optional<int > InstrLatencyGain = std::nullopt ;
550+
551+ using ScoreTy = int32_t ;
552+ // / Overall rematerialization score. Scoring components are mapped to bit
553+ // / ranges in the overall score.
554+ // /
555+ // / [31:1] : estimated RP reduction score
556+ // / [0] : known latency gain
557+ ScoreTy Score;
558+
559+ void resetScore () { Score = 0 ; }
560+
561+ void setUselessRemat () { Score = std::numeric_limits<ScoreTy>::min (); }
562+
563+ void setKnownLatencyGain () { Score |= 1 ; }
564+
565+ void setRPScore (unsigned RPScore) {
566+ Score |= static_cast <ScoreTy>(RPScore) << 1 ;
567+ }
455568 };
456569
457- // / Maps all MIs to their parent region. MI terminators are considered to be
458- // / outside the region they delimitate, and as such are not stored in the map.
570+ // / Maps all MIs (except lone terminators, which are not part of any region)
571+ // / to their parent region. Non-lone terminators are considered part of the
572+ // / region they delimitate.
459573 DenseMap<MachineInstr *, unsigned > MIRegion;
460574 // / Parent MBB to each region, in region order.
461575 SmallVector<MachineBasicBlock *> RegionBB;
462- // / Collects instructions to rematerialize.
463- MapVector<MachineInstr *, RematInstruction> Rematerializations;
464- // / Collects regions whose live-ins or register pressure will change due to
465- // / rematerializations.
466- DenseMap<unsigned , GCNRegPressure> ImpactedRegions;
467- // / In case we need to rollback rematerializations, save lane masks for all
468- // / rematerialized registers in all regions in which they are live-ins.
469- DenseMap<std::pair<unsigned , Register>, LaneBitmask> RegMasks;
470- // / After successful stage initialization, indicates which regions should be
471- // / rescheduled.
472- BitVector RescheduleRegions;
473- // / The target occupancy the stage is trying to achieve. Empty when the
576+
577+ // / Register pressure targets for all regions.
578+ SmallVector<GCNRPTarget> RPTargets;
579+ // / Regions which are above the stage's RP target.
580+ BitVector TargetRegions;
581+ // / The target occupancy the set is trying to achieve. Empty when the
474582 // / objective is spilling reduction.
475583 std::optional<unsigned > TargetOcc;
476584 // / Achieved occupancy *only* through rematerializations (pre-rescheduling).
477- // / Smaller than or equal to the target occupancy.
585+ // / Smaller than or equal to the target occupancy, when it is defined .
478586 unsigned AchievedOcc;
479587
480- // / Returns whether remat can reduce spilling or increase function occupancy
481- // / by 1 through rematerialization. If it can do one, collects instructions in
482- // / PreRARematStage::Rematerializations and sets the target occupancy in
483- // / PreRARematStage::TargetOccupancy.
484- bool canIncreaseOccupancyOrReduceSpill ();
588+ // / List of rematerializable registers.
589+ SmallVector<RematReg, 16 > RematRegs;
590+
591+ using RollbackReg = std::pair<MachineInstr *, const RematReg *>;
592+ // / List of rematerializations to rollback if rematerialization does not end
593+ // / up being beneficial. Each element pairs the MI created during
594+ // / rematerialization to the original rematerializable register.
595+ SmallVector<RollbackReg> Rollbackable;
596+
597+ // / After successful stage initialization, indicates which regions should be
598+ // / rescheduled.
599+ BitVector RescheduleRegions;
600+
601+ // / Determines the stage's objective (increasing occupancy or reducing
602+ // / spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
603+ // / achieve that objective and mark those that don't achieve it in \ref
604+ // / TargetRegions.
605+ void setObjective ();
606+
607+ // / Unsets target regions in \p Regions whose RP target has been reached.
608+ void unsetSatisifedRPTargets (const BitVector &Regions);
609+
610+ // / Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
611+ // / again all \ref TargetRegions that were optimistically marked as satisfied
612+ // / but are actually not, and returns whether there were any such regions.
613+ bool updateAndVerifyRPTargets (const BitVector &Regions);
614+
615+ // / Collects all rematerializable registers and appends them to \ref
616+ // / RematRegs. \p RegionFreq contains the frequency of each region, 0
617+ // / indicating an unknown frequency. Returns whether any rematerializable
618+ // / register was found.
619+ bool collectRematRegs (ArrayRef<uint64_t > RegionFreq);
620+
621+ // / Rematerializes \p Remat. This removes the rematerialized register from
622+ // / live-in/out lists in the DAG and updates RP targets in all affected
623+ // / regions, which are also marked in \ref RescheduleRegions. Regions in which
624+ // / RP savings are not guaranteed are set in \p RecomputeRP. Returns the newly
625+ // / created MI.
626+ MachineInstr *rematerialize (const RematReg &Remat, BitVector &RecomputeRP);
627+
628+ // / Rollbacks rematerialization \p Rollback.
629+ void rollback (const RollbackReg &Rollback) const ;
485630
486631 // / Whether the MI is rematerializable
487632 bool isReMaterializable (const MachineInstr &MI);
488633
489- // / Rematerializes all instructions in PreRARematStage::Rematerializations
490- // / and stores the achieved occupancy after remat in
491- // / PreRARematStage::AchievedOcc.
492- void rematerialize ();
493-
494634 // / If remat alone did not increase occupancy to the target one, rollbacks all
495635 // / rematerializations and resets live-ins/RP in all regions impacted by the
496636 // / stage to their pre-stage values.
497637 void finalizeGCNSchedStage () override ;
498638
639+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
640+ void printTargetRegions (bool PrintAll = false ) const ;
641+ #endif
499642public:
500643 bool initGCNSchedStage () override ;
501644
@@ -504,7 +647,13 @@ class PreRARematStage : public GCNSchedStage {
504647 bool shouldRevertScheduling (unsigned WavesAfter) override ;
505648
506649 PreRARematStage (GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
507- : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {}
650+ : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
651+ RescheduleRegions (DAG.Regions.size()) {
652+ const unsigned NumRegions = DAG.Regions .size ();
653+ RPTargets.reserve (NumRegions);
654+ RegionBB.reserve (NumRegions);
655+ MIRegion.reserve (MF.getInstructionCount ());
656+ }
508657};
509658
510659class ILPInitialScheduleStage : public GCNSchedStage {
0 commit comments