Skip to content

Commit b55af4e

Browse files
committed
[AMDGPU][Next Use Analysis] Use inside the loop NUD penalty. Bugfixing and refactoring.
Implemented NUD penalty for uses inside the loop body. If register used in a loop is selected for spill, it will be reloaded in loop pre-header. NUDs for those reg/mask are cut on the pre-header-header edge. This makes NUDs shorter and reg/mask less attractive as a spill candidate. 1. Fixed incorrect logic in insert/merge - coverage instead of exact mask match. 2. Fixed full register use before sub-reg use precedence. Refactoring: insert incorporates coverage and full over subreg precedence logic, merge uses insert.
1 parent 75a02b4 commit b55af4e

20 files changed

+10552
-12213
lines changed

llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -114,30 +114,34 @@ void NextUseResult::analyze(const MachineFunction &MF) {
114114
}
115115

116116
if (LI->getLoopDepth(MBB) < LI->getLoopDepth(Succ)) {
117-
// MBB->Succ is entering the Succ's loop
118-
// Clear out the Loop-Exiting weights.
117+
// MBB->Succ is entering the Succ's loop (analysis exiting the loop)
118+
// Two transformations:
119+
// 1. Outside-loop uses (>= LoopTag): subtract LoopTag
120+
// 2. Inside-loop uses (< LoopTag): reset to preheader position
121+
// This models: if spilled before loop, reload at preheader
119122
for (auto &P : SuccDist) {
120123
auto &Dists = P.second;
121-
// Collect items that need to be updated to avoid iterator
122-
// invalidation
123-
SmallVector<std::pair<LaneBitmask, int64_t>, 4> ToUpdate;
124+
VRegDistances::SortedRecords NewDists;
124125
for (auto R : Dists) {
125126
if (R.second >= LoopTag) {
126-
ToUpdate.push_back(R);
127+
// Outside-loop use: subtract LoopTag
128+
R.second -= LoopTag;
129+
} else {
130+
// Inside-loop use: reset so distance = 0 at preheader bottom
131+
R.second = -(int64_t)EntryOff[SuccNum];
127132
}
133+
NewDists.insert(R);
128134
}
129-
// Now apply the updates
130-
for (auto R : ToUpdate) {
131-
Dists.erase(R);
132-
R.second -= LoopTag;
133-
Dists.insert(R);
134-
}
135+
Dists = std::move(NewDists);
135136
}
136137
}
137138
LLVM_DEBUG({
138139
dbgs() << "\nCurr:";
139140
printVregDistances(Curr /*, 0 - we're at the block bottom*/);
140-
dbgs() << "\nSucc:";
141+
if (EdgeWeight != 0)
142+
dbgs() << "\nSucc (EdgeWeight " << EdgeWeight << " applied):";
143+
else
144+
dbgs() << "\nSucc:";
141145
printVregDistances(SuccDist, EntryOff[SuccNum], EdgeWeight);
142146
});
143147

llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,27 @@ class NextUseResult {
9090

9191
bool contains(unsigned Key) { return NextUseMap.contains(Key); }
9292

93+
// Compare two stored distances: returns true if A is closer or equal to B.
94+
// Handles mixed-sign values correctly:
95+
// - Negative stored values (finite distances): larger (less negative) =
96+
// closer
97+
// - Non-negative stored values (LoopTag distances): smaller = closer
98+
// - Mixed: negative (finite) is always closer than non-negative
99+
// (loop-tagged)
100+
// TODO: Investigate making LoopTag/DeadTag negative for consistent sign
101+
// convention
102+
static bool isCloserOrEqual(int64_t A, int64_t B) {
103+
// Both negative (finite): larger = closer
104+
if (A < 0 && B < 0)
105+
return A >= B;
106+
// Both non-negative (loop-tagged): smaller = closer
107+
if (A >= 0 && B >= 0)
108+
return A <= B;
109+
// Mixed: negative (finite) is always closer than non-negative
110+
// (loop-tagged)
111+
return A < 0;
112+
}
113+
93114
bool insert(VRegMaskPair VMP, int64_t Dist) {
94115
Record R(VMP.getLaneMask(), Dist);
95116
if (NextUseMap.contains(VMP.getVReg())) {
@@ -103,8 +124,8 @@ class NextUseResult {
103124

104125
// Check if existing use covers the new use
105126
if ((R.first & D.first) == R.first) {
106-
// Existing use covers new use
107-
if (D.second <= R.second) {
127+
// Existing use covers new use - keep if existing is closer
128+
if (isCloserOrEqual(D.second, R.second)) {
108129
// Existing use is closer or equal → reject new use
109130
return false;
110131
}
@@ -113,8 +134,8 @@ class NextUseResult {
113134

114135
// Check if new use covers existing use
115136
if ((D.first & R.first) == D.first) {
116-
// New use covers existing use
117-
if (R.second <= D.second) {
137+
// New use covers existing use - evict if new is closer
138+
if (isCloserOrEqual(R.second, D.second)) {
118139
// New use is closer → mark existing for removal
119140
ToErase.push_back(It);
120141
} else {
@@ -186,31 +207,17 @@ class NextUseResult {
186207
}
187208

188209
// Adjust 'Other' (which is in successor's frame) into *this* frame,
189-
// then take pointwise min by LaneBitmask.
210+
// then merge using insert's coverage logic.
190211
void merge(const VRegDistances &Other, unsigned SuccEntryOff,
191212
int64_t EdgeWeight = 0) {
192213
for (const auto &P : Other) {
193214
unsigned Key = P.getFirst();
194215
const auto &OtherDists = P.getSecond();
195-
auto &MineDists = NextUseMap[Key]; // creates empty if not present
196216

197217
for (const auto &D : OtherDists) {
198-
// D.second is the successor's STORED value (signed, relative to succ)
199218
int64_t Rebased = rebaseFromSucc(D.second, SuccEntryOff, EdgeWeight);
200-
201-
// Try to find existing record with the same LaneBitmask
202-
auto It =
203-
std::find_if(MineDists.begin(), MineDists.end(),
204-
[&](const Record &R) { return R.first == D.first; });
205-
206-
if (It == MineDists.end()) {
207-
// No record → insert
208-
MineDists.insert({D.first, Rebased});
209-
} else if (It->second > Rebased) { // take MIN in the current frame
210-
// Furthest wins (adjusted is more distant) → replace
211-
MineDists.erase(It);
212-
MineDists.insert({D.first, Rebased});
213-
}
219+
// Use insert's coverage logic for consistent handling
220+
insert(VRegMaskPair(Register(Key), D.first), Rebased);
214221
}
215222
}
216223
}

0 commit comments

Comments
 (0)