3535#include " llvm/CodeGen/PseudoSourceValueManager.h"
3636#include " llvm/CodeGen/SlotIndexes.h"
3737#include " llvm/CodeGen/TargetOpcodes.h"
38+ #include " llvm/CodeGen/TargetSubtargetInfo.h"
3839#include " llvm/CodeGen/WinEHFuncInfo.h"
3940#include " llvm/Config/llvm-config.h"
4041#include " llvm/IR/Constants.h"
@@ -99,7 +100,6 @@ static cl::opt<unsigned> MaxCandidatesOpt(
99100 " Max number of candidates that will be evaluated, 0 means no limit" ));
100101
101102STATISTIC (NumMarkerSeen, " Number of lifetime markers found." );
102- STATISTIC (GeneratedWorse, " Number of times worse layout were generated" );
103103STATISTIC (StackSpaceSaved, " Number of bytes saved due to merging slots." );
104104STATISTIC (StackSlotMerged, " Number of stack slot merged." );
105105STATISTIC (EscapedAllocas, " Number of allocas that escaped the lifetime region" );
@@ -400,7 +400,9 @@ class StackColoring {
400400 // Use to make overlap queries faster
401401 SmallVector<unsigned , 4 > StartLiveness;
402402
403- uint64_t SlotPriority = 0 ;
403+ int64_t SlotPriority = 0 ;
404+
405+ unsigned UseCount = 0 ;
404406
405407 unsigned Offset = InvalidIdx;
406408
@@ -653,9 +655,11 @@ StackColoring::SlotInfo::dump(const StackColoring *State) const {
653655 dbgs () << " :" ;
654656 if (Offset != InvalidIdx)
655657 dbgs () << " offset=" << Offset;
658+ dbgs () << " uses=" << UseCount;
659+ dbgs () << " prio=" << SlotPriority;
656660 if (State) {
657661 if (State->MFI ->getObjectAllocation (Slot))
658- dbgs () << " \" " << State->MFI ->getObjectAllocation (Slot)->getName ()
662+ dbgs () << " alloca= \" " << State->MFI ->getObjectAllocation (Slot)->getName ()
659663 << " \" " ;
660664 if (State->MFI ->isSpillSlotObjectIndex (Slot))
661665 dbgs () << " spill" ;
@@ -803,6 +807,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
803807 int Slot = MO.getIndex ();
804808 if (Slot < 0 )
805809 continue ;
810+ Slot2Info[Slot].UseCount ++;
806811 if (!BetweenStartEnd.test (Slot)) {
807812 ConservativeSlots.set (Slot);
808813 }
@@ -1525,35 +1530,24 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
15251530 if (SlotStack.size () <= 1 )
15261531 return InvalidIdx;
15271532
1528- // This Whole block is only used to try and order the stack, such that the
1529- // Slots are processed in an order that helps getting good packing
1530- {
1531- // Find how much usage of every livepoint there is.
1532- SmallVector<unsigned > CumulatedUsage;
1533- CumulatedUsage.resize (LivenessSize, 0 );
1534-
1535- for (unsigned Idx = 0 ; Idx < SlotStack.size (); Idx++) {
1536- SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1537- for (unsigned Pt : Info.Liveness .set_bits ()) {
1538- CumulatedUsage[Pt] += Info.Size ;
1539- }
1540- }
1533+ // This logic is optimized for x86_64, it probably needs to be adapted to
1534+ // other targets to get good code-size/stack-size balance.
1535+ // Its inspired from X86FrameLowering::orderFrameObjects, but modified weight
1536+ // in alignments helping with stack size
1537+ auto IsLower = [&](unsigned Lhs, unsigned Rhs) {
1538+ SlotInfo &L = Slot2Info[Lhs];
1539+ SlotInfo &R = Slot2Info[Rhs];
1540+ uint64_t DensityLScaled = static_cast <uint64_t >(L.UseCount ) *
1541+ static_cast <uint64_t >(R.Size + Log2 (R.Align ));
1542+ uint64_t DensityRScaled = static_cast <uint64_t >(R.UseCount ) *
1543+ static_cast <uint64_t >(L.Size + Log2 (L.Align ));
1544+ return DensityLScaled < DensityRScaled;
1545+ };
1546+ std::stable_sort (SlotStack.begin (), SlotStack.end (), IsLower);
15411547
1542- for (unsigned Idx = 0 ; Idx < SlotStack.size (); Idx++) {
1543- SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1544- for (unsigned Pt : Info.Liveness .set_bits ()) {
1545- // Since the goal is to minimize the max usage, blocks that are in high
1546- // contention areas are given more priority
1547- Info.SlotPriority +=
1548- (uint64_t )CumulatedUsage[Pt] * (uint64_t )CumulatedUsage[Pt] +
1549- (uint64_t )Info.Size * (uint64_t )Info.Align .value ();
1550- }
1551- }
1552- std::stable_sort (
1553- SlotStack.begin (), SlotStack.end (), [&](unsigned Lhs, unsigned Rhs) {
1554- return Slot2Info[Lhs].SlotPriority < Slot2Info[Rhs].SlotPriority ;
1555- });
1556- }
1548+ int Prio = 0 ;
1549+ for (int Slot : SlotStack)
1550+ Slot2Info[Slot].SlotPriority = Prio++;
15571551
15581552 SlotInfo *LastQueryLhs = nullptr ;
15591553 SlotInfo *LastQueryRhs = nullptr ;
@@ -1666,24 +1660,27 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16661660
16671661 Offset = alignTo (Offset, Info.Align );
16681662
1669- LLVM_DEBUG (dbgs () << " fi#" << Candidates[K] << " @" << Offset << " ->" ;
1670- if (PrevSlot == InvalidIdx) dbgs () << " bottom" ;
1671- else dbgs () << " fi#" << PrevSlot; dbgs () << " , " ;);
1663+ LLVM_DEBUG ({
1664+ dbgs () << " fi#" << Candidates[K] << " @" << Offset;
1665+ if (PrevSlot != InvalidIdx)
1666+ dbgs () << " ->" << " fi#" << PrevSlot;
1667+ dbgs () << " , " ;
1668+ });
16721669
16731670 bool IsBetter = [&] {
1671+ if (BestIdx == InvalidIdx)
1672+ return true ;
1673+ SlotInfo &Best = Slot2Info[Candidates[BestIdx]];
16741674 if (BestOffset != Offset)
16751675 return BestOffset > Offset;
1676- SlotInfo &Other = Slot2Info[Candidates[K]];
1677- if (Other.Align != Info.Align )
1678- return Other.Align < Info.Align ;
1679- if (Other.Size != Info.Size )
1680- return Other.Size < Info.Size ;
1681- if (Other.SlotPriority != Info.SlotPriority )
1682- return Other.SlotPriority < Info.SlotPriority ;
1676+ if (Best.SlotPriority != Info.SlotPriority )
1677+ return Best.SlotPriority < Info.SlotPriority ;
1678+ if (Best.Align != Info.Align )
1679+ return Best.Align < Info.Align ;
16831680
16841681 // Both are always stored in Slot2Info, so this is equivalent to
16851682 // FrameIndex comparaison
1686- return &Other < &Info;
1683+ return &Best < &Info;
16871684 }();
16881685
16891686 if (IsBetter) {
@@ -1726,7 +1723,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
17261723 LLVM_DEBUG (dbgs () << " MergedSize=" << FinalSize << " OrigPesSize="
17271724 << OrigPesSize << " OrigOptSize" << OrigOptSize << " \n " );
17281725 if (FinalSize >= OrigPesSize) {
1729- GeneratedWorse++;
17301726 return InvalidIdx;
17311727 }
17321728
@@ -1774,6 +1770,7 @@ bool StackColoring::run(MachineFunction &Func) {
17741770 Intervals.reserve (NumSlots);
17751771 LiveStarts.resize (NumSlots);
17761772
1773+ Slot2Info.resize (NumSlots);
17771774 unsigned NumMarkers = collectMarkers (NumSlots);
17781775
17791776 unsigned TotalSize = 0 ;
@@ -1792,7 +1789,6 @@ bool StackColoring::run(MachineFunction &Func) {
17921789 return removeAllMarkers ();
17931790 }
17941791
1795- Slot2Info.resize (NumSlots);
17961792 for (unsigned i = 0 ; i < NumSlots; ++i) {
17971793 std::unique_ptr<LiveRange> LI (new LiveRange ());
17981794 LI->getNextValue (Indexes->getZeroIndex (), VNInfoAllocator);
0 commit comments