@@ -648,9 +648,6 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
648648 dbgs () << ' ' << SIdx;
649649 dbgs () << ' \n ' ;
650650 }
651- for (unsigned Slot = 0 ; Slot < Slot2Info.size (); Slot++) {
652- Slot2Info[Slot].dump (this );
653- }
654651}
655652
656653LLVM_DUMP_METHOD void StackColoring::SlotInfo::dump (const StackColoring* State) const {
@@ -1060,12 +1057,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
10601057 });
10611058 }
10621059 }
1063- LLVM_DEBUG ({
1064- for (SplitSlotChanges C : MidBlockSpillChanges) {
1065- dbgs () << " Idx=" << C.BlockIdx << " Slot=" << C.Slot
1066- << " IsStart=" << C.IsStart << " MI=" << *C.AtMI ;
1067- }
1068- });
10691060
10701061 // To avoid needing bounds checks
10711062 MidBlockSpillChanges.push_back ({nullptr , 0 , false , InvalidIdx});
@@ -1583,17 +1574,14 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
15831574 }
15841575 std::stable_sort (
15851576 SlotStack.begin (), SlotStack.end (), [&](unsigned Lhs, unsigned Rhs) {
1586- if (Lhs == InvalidIdx)
1587- return false ;
1588- if (Rhs == InvalidIdx)
1589- return true ;
15901577 return Slot2Info[Lhs].SlotPriority < Slot2Info[Rhs].SlotPriority ;
15911578 });
15921579 }
15931580
15941581 SlotInfo* LastQueryLhs = nullptr ;
15951582 SlotInfo* LastQueryRhs = nullptr ;
15961583 bool LastQueryRes = false ;
1584+ // TODO: Real caching ?
15971585 auto HasOverlapCached = [&](SlotInfo &Lhs, SlotInfo &Rhs) {
15981586 if (&Lhs == LastQueryLhs && LastQueryRhs == &Rhs)
15991587 return LastQueryRes;
@@ -1604,8 +1592,14 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16041592 };
16051593
16061594 struct Status {
1595+ // This is the offset at which a slot on top should be placed. So the offset
1596+ // of the slot + the size of the slot
16071597 unsigned Offset = 0 ;
1598+
1599+ // The Slot just below the offset.
16081600 unsigned Slot = InvalidIdx;
1601+
1602+ // The index of the previous status in OlderStatus
16091603 unsigned Prev = InvalidIdx;
16101604 };
16111605
@@ -1616,22 +1610,41 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16161610 auto FindOffset = [&](SlotInfo &Info, unsigned Pt) {
16171611 Status *Last = &LatestStatus[Pt];
16181612
1619- // This is only called on Slot that have overlapping lifetimes
1620- // So the no overlap only happens when there lifetime overlap but only one
1621- // can be live because where they start in the CFG is mutually exclusive
1622- // See the comment about implementation for an example
1613+ // The slots in the linked-list are always kept in ascending order, so the
1614+ // earliest slot has the lowest offset
1615+ // This loop handles cases where the latest slot doesn't cannot be both live
1616+ // because of the CFG, so even if there lifetime overlap, they can overlap
16231617 while (LLVM_UNLIKELY (Last->Slot != InvalidIdx &&
16241618 !HasOverlapCached (Info, Slot2Info[Last->Slot ])))
16251619 Last = &OlderStatus[Last->Prev ];
16261620 return Last->Offset ;
16271621 };
16281622 auto UpdateOffset = [&](SlotInfo &Info, unsigned Pt, unsigned Offset) {
1629- Status& Last = LatestStatus[Pt];
1623+ Status* Last = & LatestStatus[Pt];
16301624 unsigned Idx = OlderStatus.size ();
1631- OlderStatus.push_back (Last);
1632- Last.Prev = Idx;
1633- Last.Offset = Offset;
1634- Last.Slot = &Info - Slot2Info.data ();
1625+ OlderStatus.push_back (*Last);
1626+
1627+ // this is branch is not taken only when we are inserting a slot that wasn't
1628+ // overlapping with the previous slot and is smaller. so the slot inserted
1629+ // slot is not the new start of the linked-list
1630+ if (LLVM_LIKELY (Last->Offset <= Offset)) {
1631+ Last->Prev = Idx;
1632+ Last->Offset = Offset;
1633+ Last->Slot = &Info - Slot2Info.data ();
1634+ return ;
1635+ }
1636+
1637+ // Insure ordering of slots
1638+ Status* Inserted = &OlderStatus.back ();
1639+ Inserted->Offset = Offset;
1640+ Inserted->Slot = &Info - Slot2Info.data ();
1641+ Status *Curr = Last;
1642+ while (Curr->Prev != InvalidIdx && OlderStatus[Curr->Prev ].Offset > Offset)
1643+ Curr = &OlderStatus[Curr->Prev ];
1644+
1645+ // Insert the new node in the linked-list
1646+ Inserted->Prev = Curr->Prev ;
1647+ Curr->Prev = Idx;
16351648 };
16361649
16371650 SmallVector<unsigned , MaxCandidatesToConsiderDefault> Candidates;
@@ -1643,25 +1656,34 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16431656 Candidates.push_back (SlotStack.pop_back_val ());
16441657 }
16451658
1659+ unsigned WorseCaseOffset = 0 ;
16461660 while (!Candidates.empty ()) {
1647- int64_t BestScore = std::numeric_limits<int64_t >::max ();
16481661 unsigned BestIdx = InvalidIdx;
16491662 unsigned BestOffset = InvalidIdx;
16501663
16511664 for (unsigned K = 0 ; K < Candidates.size (); K++) {
16521665 SlotInfo &Info = Slot2Info[Candidates[K]];
16531666 unsigned Offset = 0 ;
1654- for (unsigned Pt : Info.Liveness .set_bits ())
1667+ for (unsigned Pt : Info.Liveness .set_bits ()) {
16551668 Offset = std::max (Offset, FindOffset (Info, Pt));
16561669
1670+ // If Offset == WorseCaseOffset, this is always a valid, options. so no
1671+ // more checking needed
1672+ // If Offset > BestOffset, we already found a better solution, so this
1673+ // one doesn't matter
1674+ if (Offset == WorseCaseOffset || Offset > BestOffset)
1675+ break ;
1676+ }
1677+
16571678 Offset = alignTo (Offset, Info.Align );
16581679
1659- int64_t Score = (int64_t )Offset - (int64_t )Log2 (Info.Align );
1660- LLVM_DEBUG (dbgs () << " SlotInfo(" << Candidates[K] << " ) Score=" << Score << " \n " );
1680+ LLVM_DEBUG (dbgs () << " choice: SlotInfo(" << Candidates[K] << " ) at " << Offset << " \n " );
16611681 bool IsBetter = [&] {
1662- if (BestScore != Score )
1663- return BestScore > Score ;
1682+ if (BestOffset != Offset )
1683+ return BestOffset > Offset ;
16641684 SlotInfo &Other = Slot2Info[Candidates[K]];
1685+ if (Other.Align != Info.Align )
1686+ return Other.Align < Info.Align ;
16651687 if (Other.Size != Info.Size )
16661688 return Other.Size < Info.Size ;
16671689 if (Other.SlotPriority != Info.SlotPriority )
@@ -1672,7 +1694,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16721694 }();
16731695
16741696 if (IsBetter) {
1675- BestScore = Score;
16761697 BestIdx = K;
16771698 BestOffset = Offset;
16781699 }
@@ -1681,11 +1702,24 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16811702
16821703 LLVM_DEBUG (Info.dump (this ));
16831704 LLVM_DEBUG (dbgs () << " Placing SlotInfo(" << Candidates[BestIdx] << " ) at "
1684- << BestOffset << " Score= " << BestScore << " \n " );
1705+ << BestOffset << " \n " );
16851706
16861707 Info.Offset = BestOffset;
1708+ WorseCaseOffset = std::max (WorseCaseOffset, BestOffset + Info.Size );
16871709 for (unsigned Pt : Info.Liveness .set_bits ())
16881710 UpdateOffset (Info, Pt, BestOffset + Info.Size );
1711+ #ifdef EXPENSIVE_CHECKS
1712+ // Validate the order of offsets in the linked-list
1713+ for (Status &S : LatestStatus) {
1714+ Status *Curr = &S;
1715+ unsigned CurrOffset = Curr->Offset ;
1716+ while (Curr->Prev != InvalidIdx) {
1717+ assert (Curr->Offset <= CurrOffset);
1718+ CurrOffset = Curr->Offset ;
1719+ Curr = &OlderStatus[Curr->Prev ];
1720+ }
1721+ }
1722+ #endif
16891723
16901724 std::swap (Candidates[BestIdx], Candidates.back ());
16911725 Candidates.pop_back ();
@@ -1788,14 +1822,14 @@ bool StackColoring::run(MachineFunction &Func) {
17881822
17891823 // Propagate the liveness information.
17901824 calculateLiveIntervals (NumSlots);
1791- LLVM_DEBUG (dumpIntervals ());
17921825
17931826 // Search for allocas which are used outside of the declared lifetime
17941827 // markers.
17951828 if (ProtectFromEscapedAllocas)
17961829 removeInvalidSlotRanges ();
17971830
17981831 if (!UseNewStackColoring) {
1832+ LLVM_DEBUG (dumpIntervals ());
17991833 // Maps old slots to new slots.
18001834 DenseMap<int , int > SlotRemap;
18011835 unsigned RemovedSlots = 0 ;
0 commit comments