Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,14 @@ enum RegisterMapping {
SQ_MAX_PGM_VGPRS = 1024, // Maximum programmable VGPRs across all targets.
AGPR_OFFSET = 512, // Maximum programmable ArchVGPRs across all targets.
SQ_MAX_PGM_SGPRS = 128, // Maximum programmable SGPRs across all targets.
NUM_EXTRA_VGPRS = 9, // Reserved slots for DS.
// Artificial register slots to track LDS writes into specific LDS locations
// if a location is known. When slots are exhausted or location is
// unknown use the first slot. The first slot is also always updated in
// addition to known location's slot to properly generate waits if dependent
// instruction's location is unknown.
EXTRA_VGPR_LDS = 0,
NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
FIRST_LDS_VGPR = SQ_MAX_PGM_VGPRS, // Extra slots for LDS stores.
NUM_LDS_VGPRS = 9, // One more than the stores we track.
NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_LDS_VGPRS, // Where SGPRs start.
};

// Enumerate different types of result-returning VMEM operations. Although
Expand Down Expand Up @@ -488,7 +488,7 @@ class WaitcntBrackets {
unsigned char VgprVmemTypes[NUM_ALL_VGPRS] = {0};
// Store representative LDS DMA operations. The only useful info here is
// alias info. One store is kept per unique AAInfo.
SmallVector<const MachineInstr *, NUM_EXTRA_VGPRS - 1> LDSDMAStores;
SmallVector<const MachineInstr *, NUM_LDS_VGPRS - 1> LDSDMAStores;
};

// This abstracts the logic for generating and updating S_WAIT* instructions
Expand Down Expand Up @@ -1062,15 +1062,15 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
}
}
if (Slot || LDSDMAStores.size() == NUM_EXTRA_VGPRS - 1)
if (Slot || LDSDMAStores.size() == NUM_LDS_VGPRS - 1)
break;
LDSDMAStores.push_back(&Inst);
Slot = LDSDMAStores.size();
break;
}
setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS + Slot, T, CurrScore);
setRegScore(FIRST_LDS_VGPR + Slot, T, CurrScore);
if (Slot)
setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, CurrScore);
setRegScore(FIRST_LDS_VGPR, T, CurrScore);
}
}
}
Expand Down Expand Up @@ -1122,7 +1122,7 @@ void WaitcntBrackets::print(raw_ostream &OS) const {
if (RegScore <= LB)
continue;
unsigned RelScore = RegScore - LB - 1;
if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
if (J < FIRST_LDS_VGPR) {
OS << RelScore << ":v" << J << " ";
} else {
OS << RelScore << ":ds ";
Expand Down Expand Up @@ -1914,7 +1914,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
continue;

// LOAD_CNT is only relevant to vgpr or LDS.
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
unsigned RegNo = FIRST_LDS_VGPR;
// Only objects with alias scope info were added to LDSDMAScopes array.
// In the absense of the scope info we will not be able to disambiguate
// aliasing here. There is no need to try searching for a corresponding
Expand Down