@@ -287,14 +287,6 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
287287 // Compute and cache live-ins and pressure for all regions in block.
288288 void computeBlockPressure (unsigned RegionIdx, const MachineBasicBlock *MBB);
289289
290- // / If necessary, updates a region's boundaries following insertion ( \p NewMI
291- // / != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
292- // / For an MI removal, this must be called before the MI is actually erased
293- // / from its parent MBB.
294- void updateRegionBoundaries (RegionBoundaries &RegionBounds,
295- MachineBasicBlock::iterator MI,
296- MachineInstr *NewMI);
297-
298290 void runSchedStages ();
299291
300292 std::unique_ptr<GCNSchedStage> createSchedStage (GCNSchedStageID SchedStageID);
@@ -462,88 +454,66 @@ class PreRARematStage : public GCNSchedStage {
462454 MachineInstr *DefMI;
463455 // / Single user of the rematerializable register.
464456 MachineInstr *UseMI;
465- // / Using region.
466- unsigned UseRegion;
467457 // / Regions in which the register is live-in/live-out/live anywhere.
468458 BitVector LiveIn, LiveOut, Live;
469459 // / The rematerializable register's lane bitmask.
470460 LaneBitmask Mask;
471- // / Frequency of region defining/using the register. 0 when unknown.
472- unsigned DefFrequency, UseFrequency;
461+ // / Defining and using regions.
462+ unsigned DefRegion, UseRegion;
463+ // / Frequency of defining/using regions. 0 when unknown.
464+ uint64_t DefFrequency, UseFrequency;
473465
474466 RematReg (MachineInstr *DefMI, MachineInstr *UseMI,
475467 GCNScheduleDAGMILive &DAG,
476468 const DenseMap<MachineInstr *, unsigned > &MIRegion,
477469 ArrayRef<uint64_t > RegionFreq);
478470
479- // / Returns whether the regions at which the register is live intersects
480- // / with the \p Target regions.
481- bool intersectWithTarget (BitVector Target) const {
482- Target &= Live;
483- return Target.any ();
484- }
471+ // / Returns the rematerializable register. Do not call after deleting the
472+ // / original defining instruction.
473+ Register getReg () const { return DefMI->getOperand (0 ).getReg (); }
485474
486- // / Returns whether is is always beneficial to rematerialize this register.
487- // / These are rematerializations that never move instructions into higher
488- // / frequency regions and at least shorten live intervals, so they are
489- // / always useful irrespective of RP targets.
490- bool isAlwaysBeneficial () const {
491- // When the using region is executed a single time, we know
492- // rematerializing will be beneficial whatever the defining region's
493- // frequency.
494- if (UseFrequency == 1 )
495- return true ;
496- // When there is uncertainty on the defining or using frequency, we err on
497- // the conservative side and do not consider the rematerialization always
498- // beneficial.
499- if (!DefFrequency || !UseFrequency)
500- return false ;
501- return UseFrequency <= DefFrequency;
502- }
475+ // / Determines whether this rematerialization may be beneficial in at least
476+ // / one target region.
477+ bool maybeBeneficial (const BitVector &TargetRegions,
478+ ArrayRef<GCNRPTarget> RPTargets) const ;
503479
504- // / Determines whether rematerializing the register is guaranteed to reduce
505- // / pressure in the region.
506- bool isBeneficialRegion (unsigned I) const {
480+ // / Determines if the register is both unused and live-through in region \p
481+ // / I. This guarantees that rematerializing it will reduce RP in the region.
482+ bool isUnusedLiveThrough (unsigned I) const {
507483 assert (I < Live.size () && " region index out of range" );
508484 return LiveIn[I] && LiveOut[I] && I != UseRegion;
509485 }
510486
511- // / Determines whether rematerializing the register can but is not
512- // / guaranteed to reduce pressure in the region.
513- bool isMaybeBeneficialRegion (unsigned I) const {
514- assert (I < Live.size () && " region index out of range" );
515- return Live[I] && !isBeneficialRegion (I);
516- }
517-
518487 // / Updates internal structures following a MI rematerialization. Part of
519488 // / the stage instead of the DAG because it makes assumptions that are
520489 // / specific to the rematerialization process.
521- MachineInstr *insertMI (unsigned RegionIdx,
522- MachineBasicBlock::iterator InsertPos,
523- GCNScheduleDAGMILive &DAG) const ;
490+ void insertMI (unsigned RegionIdx, MachineInstr *RematMI,
491+ GCNScheduleDAGMILive &DAG) const ;
524492
525493#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
526- void print (const DenseMap<MachineInstr *, unsigned > &MIRegion ) const ;
494+ void print () const ;
527495#endif
528496 };
529497
530498 // / A scored rematerializable register. Higher scores indicate more beneficial
531- // / rematerializations. Non-positive scores indicate the rematerialization is
499+ // / rematerializations. A null score indicate the rematerialization is
532500 // / not helpful to reduce RP in target regions.
533501 struct ScoredRemat {
534502 // / The rematerializable register under consideration.
535503 const RematReg *Remat;
536504
537505 // / This only initializes state-independent characteristics of \p Remat, not
538506 // / the actual score.
539- ScoredRemat (const RematReg *Remat, const GCNScheduleDAGMILive &DAG);
507+ ScoredRemat (const RematReg *Remat, uint64_t MinFreq, uint64_t MaxFreq,
508+ const GCNScheduleDAGMILive &DAG);
540509
541510 // / Updates the rematerialization's score w.r.t. the current \p RPTargets.
542511 // / \p RegionFreq indicates the frequency of each region
543512 void update (const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
544513 ArrayRef<uint64_t > RegionFreq, bool ReduceSpill);
545514
546- int getScore () const { return Score; }
515+ // / Returns whether the current score is null.
516+ bool hasNullScore () const { return !Score; }
547517
548518 bool operator <(const ScoredRemat &O) const {
549519 // Break ties using pointer to rematerializable register. Since
@@ -554,49 +524,68 @@ class PreRARematStage : public GCNSchedStage {
554524 return Score < O.Score ;
555525 }
556526
527+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
528+ void print () const ;
529+ #endif
530+
557531 private:
558- // / Per-region contribution weights to RP score depending on whether RP is
559- // / guaranteed or only likely to be reduced in the region. Only their
560- // / relative value w.r.t. one another matter.
561- static constexpr int WeightRP = 2 , WeightRPMaybe = 1 ;
532+ // / Bitwidths for score components.
533+ static constexpr unsigned MaxFreqWidth = 32 , FreqDiffWidth = 16 ,
534+ RegionImpactWidth = 16 ;
562535
563536 // / Number of 32-bit registers this rematerialization covers.
564537 const unsigned NumRegs;
565- // / Latency gain induced by rematerializing the register over spilling its
566- // / defining instruction.
567- const int RematLatencyGainOverSpill;
568-
569- // / Whether we can estimate the latency gain of rematerialazing over
570- // / spilling; this requires knowing defining/using region frequencies.
571- bool hasUnknownLatencyGain () const {
572- return !Remat->DefFrequency || !Remat->UseFrequency ;
573- }
538+ // / Frequency difference between defining and using regions, normalized to
539+ // / the maximum possible difference and rescaled to the representable range
540+ // / in the score.
541+ const uint64_t FreqDiff;
574542
575- using ScoreTy = int32_t ;
543+ using ScoreTy = uint64_t ;
576544 // / Overall rematerialization score. Scoring components are mapped to bit
577545 // / ranges in the overall score.
578546 // /
579- // / [31:1] : estimated RP reduction score
580- // / [0] : known latency gain
581- ScoreTy Score;
547+ // / [63:32] : maximum frequency in benefiting target region (spilling only)
548+ // / [31:16] : frequency difference between defining and using region
549+ // / [15: 0] : number of benefiting regions times register size
550+ ScoreTy Score = 0 ;
582551
583- void resetScore () { Score = 0 ; }
552+ void setNullScore () { Score = 0 ; }
584553
585- void setUselessRemat () { Score = std::numeric_limits<ScoreTy>::min (); }
554+ void setMaxFreqScore (ScoreTy MaxFreq) {
555+ MaxFreq = std::min (
556+ static_cast <ScoreTy>(std::numeric_limits<uint32_t >::max ()), MaxFreq);
557+ Score |= MaxFreq << (FreqDiffWidth + RegionImpactWidth);
558+ }
586559
587- void setKnownLatencyGain () { Score |= 1 ; }
560+ void setFreqDiffScore (ScoreTy FreqDiff) {
561+ FreqDiff = std::min (
562+ static_cast <ScoreTy>(std::numeric_limits<uint16_t >::max ()), FreqDiff);
563+ Score |= FreqDiff << RegionImpactWidth;
564+ }
588565
589- void setRPScore (ScoreTy RPScore) { Score |= RPScore << 1 ; }
566+ void setRegionImpactScore (ScoreTy RegionImpact) {
567+ RegionImpact =
568+ std::min (static_cast <ScoreTy>(std::numeric_limits<uint16_t >::max ()),
569+ RegionImpact);
570+ Score |= RegionImpact;
571+ }
590572
591573 unsigned getNumRegs (const GCNScheduleDAGMILive &DAG) const ;
592574
593- unsigned getLatencyGain (const GCNScheduleDAGMILive &DAG) const ;
575+ uint64_t getFreqDiff (uint64_t MinFreq, uint64_t MaxFreq) const ;
576+ };
577+
578+ // / Holds enough information to rollback a rematerialization decision post
579+ // / re-scheduling.
580+ struct RollbackInfo {
581+ // / The rematerializable register under consideration.
582+ const RematReg *Remat;
583+ // / The rematerialized MI replacing the original defining MI.
584+ MachineInstr *RematMI;
585+
586+ RollbackInfo (const RematReg *Remat) : Remat(Remat) {}
594587 };
595588
596- // / Maps all MIs (except lone terminators, which are not part of any region)
597- // / to their parent region. Non-lone terminators are considered part of the
598- // / region they delimitate.
599- DenseMap<MachineInstr *, unsigned > MIRegion;
600589 // / Parent MBB to each region, in region order.
601590 SmallVector<MachineBasicBlock *> RegionBB;
602591
@@ -613,22 +602,18 @@ class PreRARematStage : public GCNSchedStage {
613602
614603 // / List of rematerializable registers.
615604 SmallVector<RematReg, 16 > RematRegs;
616-
617- using RollbackReg = std::pair<MachineInstr *, const RematReg *>;
618605 // / List of rematerializations to rollback if rematerialization does not end
619- // / up being beneficial. Each element pairs the MI created during
620- // / rematerialization to the original rematerializable register.
621- SmallVector<RollbackReg> Rollbackable;
622-
606+ // / up being beneficial.
607+ SmallVector<RollbackInfo> Rollbacks;
623608 // / After successful stage initialization, indicates which regions should be
624609 // / rescheduled.
625610 BitVector RescheduleRegions;
626611
627612 // / Determines the stage's objective (increasing occupancy or reducing
628613 // / spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
629614 // / achieve that objective and mark those that don't achieve it in \ref
630- // / TargetRegions.
631- void setObjective ();
615+ // / TargetRegions. Returns whether there is any target region.
616+ bool setObjective ();
632617
633618 // / Unsets target regions in \p Regions whose RP target has been reached.
634619 void unsetSatisifedRPTargets (const BitVector &Regions);
@@ -639,20 +624,26 @@ class PreRARematStage : public GCNSchedStage {
639624 bool updateAndVerifyRPTargets (const BitVector &Regions);
640625
641626 // / Collects all rematerializable registers and appends them to \ref
642- // / RematRegs. \p RegionFreq contains the frequency of each region, 0
643- // / indicating an unknown frequency. Returns whether any rematerializable
644- // / register was found.
645- bool collectRematRegs (ArrayRef<uint64_t > RegionFreq);
627+ // / RematRegs. \p MIRegion maps MIs to their region and \p RegionFreq contains
628+ // / the frequency of each region, 0 indicating an unknown frequency. Returns
629+ // / whether any rematerializable register was found.
630+ bool collectRematRegs (const DenseMap<MachineInstr *, unsigned > &MIRegion,
631+ ArrayRef<uint64_t > RegionFreq);
646632
647633 // / Rematerializes \p Remat. This removes the rematerialized register from
648634 // / live-in/out lists in the DAG and updates RP targets in all affected
649635 // / regions, which are also marked in \ref RescheduleRegions. Regions in which
650- // / RP savings are not guaranteed are set in \p RecomputeRP. Returns the newly
651- // / created MI.
652- MachineInstr *rematerialize (const RematReg &Remat, BitVector &RecomputeRP);
653-
654- // / Rollbacks rematerialization \p Rollback.
655- void rollback (const RollbackReg &Rollback) const ;
636+ // / RP savings are not guaranteed are set in \p RecomputeRP. When \p Rollback
637+ // / is non-null, fills it with required information to be able to rollback the
638+ // / rematerialization post-rescheduling.
639+ void rematerialize (const RematReg &Remat, BitVector &RecomputeRP,
640+ RollbackInfo *Rollback);
641+
642+ // / Rollbacks the rematerialization decision represented by \p Rollback. This
643+ // / update live-in/out lists in the DAG but does not update cached register
644+ // / pressures. Regions in which RP may be impacted are marked in \ref
645+ // / RecomputeRP.
646+ void rollback (const RollbackInfo &Rollback, BitVector &RecomputeRP) const ;
656647
657648 // / Whether the MI is rematerializable
658649 bool isReMaterializable (const MachineInstr &MI);
@@ -678,7 +669,6 @@ class PreRARematStage : public GCNSchedStage {
678669 const unsigned NumRegions = DAG.Regions .size ();
679670 RPTargets.reserve (NumRegions);
680671 RegionBB.reserve (NumRegions);
681- MIRegion.reserve (MF.getInstructionCount ());
682672 }
683673};
684674
0 commit comments