Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ Register SIMachineFunctionInfo::addLDSKernelId() {
SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) &&
"Preload kernel argument allocated twice.");
auto [It, Inserted] = ArgInfo.PreloadKernArgs.try_emplace(KernArgIdx);
assert(Inserted && "Preload kernel argument allocated twice.");
NumUserSGPRs += PaddingSGPRs;
// If the available register tuples are aligned with the kernarg to be
// preloaded use that register, otherwise we need to use a set of SGPRs and
Expand All @@ -264,20 +264,22 @@ SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR();
Register PreloadReg =
TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
auto &Regs = It->second.Regs;
if (PreloadReg &&
(RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(PreloadReg);
Regs.push_back(PreloadReg);
NumUserSGPRs += AllocSizeDWord;
} else {
Regs.reserve(AllocSizeDWord);
for (unsigned I = 0; I < AllocSizeDWord; ++I) {
ArgInfo.PreloadKernArgs[KernArgIdx].Regs.push_back(getNextUserSGPR());
Regs.push_back(getNextUserSGPR());
NumUserSGPRs++;
}
}

// Track the actual number of SGPRs that HW will preload to.
UserSGPRInfo.allocKernargPreloadSGPRs(AllocSizeDWord + PaddingSGPRs);
return &ArgInfo.PreloadKernArgs[KernArgIdx].Regs;
return &Regs;
}

void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
Expand Down