@@ -254,8 +254,8 @@ Register SIMachineFunctionInfo::addLDSKernelId() {
254254SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg (
255255 const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
256256 unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
257- assert (! ArgInfo.PreloadKernArgs .count (KernArgIdx) &&
258- " Preload kernel argument allocated twice." );
257+ auto [It, Inserted] = ArgInfo.PreloadKernArgs .try_emplace (KernArgIdx);
258+ assert (Inserted && " Preload kernel argument allocated twice." );
259259 NumUserSGPRs += PaddingSGPRs;
260260 // If the available register tuples are aligned with the kernarg to be
261261 // preloaded use that register, otherwise we need to use a set of SGPRs and
@@ -264,20 +264,22 @@ SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
264264 ArgInfo.FirstKernArgPreloadReg = getNextUserSGPR ();
265265 Register PreloadReg =
266266 TRI.getMatchingSuperReg (getNextUserSGPR (), AMDGPU::sub0, RC);
267+ auto &Regs = It->second .Regs ;
267268 if (PreloadReg &&
268269 (RC == &AMDGPU::SReg_32RegClass || RC == &AMDGPU::SReg_64RegClass)) {
269- ArgInfo. PreloadKernArgs [KernArgIdx]. Regs .push_back (PreloadReg);
270+ Regs.push_back (PreloadReg);
270271 NumUserSGPRs += AllocSizeDWord;
271272 } else {
273+ Regs.reserve (AllocSizeDWord);
272274 for (unsigned I = 0 ; I < AllocSizeDWord; ++I) {
273- ArgInfo. PreloadKernArgs [KernArgIdx]. Regs .push_back (getNextUserSGPR ());
275+ Regs.push_back (getNextUserSGPR ());
274276 NumUserSGPRs++;
275277 }
276278 }
277279
278280 // Track the actual number of SGPRs that HW will preload to.
279281 UserSGPRInfo.allocKernargPreloadSGPRs (AllocSizeDWord + PaddingSGPRs);
280- return &ArgInfo. PreloadKernArgs [KernArgIdx]. Regs ;
282+ return &Regs;
281283}
282284
283285void SIMachineFunctionInfo::allocateWWMSpill (MachineFunction &MF, Register VGPR,
0 commit comments