@@ -137,10 +137,10 @@ enum WaitEventType {
137137// We reserve a fixed number of VGPR slots in the scoring tables for
138138// special tokens like SCMEM_LDS (needed for buffer load to LDS).
139139enum RegisterMapping {
140- SQ_MAX_PGM_VGPRS = 512 , // Maximum programmable VGPRs across all targets.
141- AGPR_OFFSET = 256 , // Maximum programmable ArchVGPRs across all targets.
142- SQ_MAX_PGM_SGPRS = 256 , // Maximum programmable SGPRs across all targets.
143- NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
140+ SQ_MAX_PGM_VGPRS = 1024 , // Maximum programmable VGPRs across all targets.
141+ AGPR_OFFSET = 512 , // Maximum programmable ArchVGPRs across all targets.
142+ SQ_MAX_PGM_SGPRS = 256 , // Maximum programmable SGPRs across all targets.
143+ NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
144144 // Artificial register slots to track LDS writes into specific LDS locations
145145 // if a location is known. When slots are exhausted or location is
146146 // unknown use the first slot. The first slot is also always updated in
@@ -165,6 +165,17 @@ enum VmemType {
165165 NUM_VMEM_TYPES
166166};
167167
168+ static unsigned getRegPoint (MCRegister Reg, const SIRegisterInfo &TRI) {
169+ // Order register interval points so that intervals of 32-bit VGPRs
170+ // include intervals of their 16-bit halves.
171+ MCRegister MCReg = AMDGPU::getMCReg (Reg, TRI.getSubtarget ());
172+ unsigned RegIdx = TRI.getHWRegIndex (MCReg);
173+ bool IsHi = AMDGPU::isHi16Reg (MCReg, TRI);
174+ bool IsVector = TRI.isVectorRegister (MCReg);
175+ assert (isUInt<8 >(RegIdx));
176+ return (IsVector ? 0x200 : 0 ) | (RegIdx << 1 ) | (IsHi ? 1 : 0 );
177+ }
178+
168179// Maps values of InstCounterType to the instruction that waits on that
169180// counter. Only used if GCNSubtarget::hasExtendedWaitCounts()
170181// returns true.
@@ -757,30 +768,31 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
757768
758769 RegInterval Result;
759770
760- unsigned Reg = TRI->getEncodingValue (AMDGPU::getMCReg (Op.getReg (), *ST)) &
761- AMDGPU::HWEncoding::REG_IDX_MASK;
771+ unsigned Reg = getRegPoint (Op.getReg (), *TRI);
772+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
773+ unsigned Size = TRI->getRegSizeInBits (*RC);
762774
775+ // VGPRs are tracked every 16 bits, SGPRs by 32 bits
763776 if (TRI->isVectorRegister (*MRI, Op.getReg ())) {
764777 assert (Reg >= Encoding.VGPR0 && Reg <= Encoding.VGPRL );
765778 Result.first = Reg - Encoding.VGPR0 ;
766779 if (TRI->isAGPR (*MRI, Op.getReg ()))
767780 Result.first += AGPR_OFFSET;
768781 assert (Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
782+ assert (Size % 16 == 0 );
783+ Result.second = Result.first + (Size / 16 );
769784 } else if (TRI->isSGPRReg (*MRI, Op.getReg ())) {
770- assert (Reg >= Encoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS);
771- Result.first = Reg - Encoding.SGPR0 + NUM_ALL_VGPRS;
785+ assert (Reg >= Encoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS * 2 );
786+ Result.first = (( Reg - Encoding.SGPR0 ) >> 1 ) + NUM_ALL_VGPRS;
772787 assert (Result.first >= NUM_ALL_VGPRS &&
773788 Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
789+ Result.second = Result.first + divideCeil (Size, 32 );
774790 }
775791 // TODO: Handle TTMP
776792 // else if (TRI->isTTMP(*MRI, Reg.getReg())) ...
777793 else
778794 return {-1 , -1 };
779795
780- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
781- unsigned Size = TRI->getRegSizeInBits (*RC);
782- Result.second = Result.first + ((Size + 16 ) / 32 );
783-
784796 return Result;
785797}
786798
@@ -2452,16 +2464,14 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
24522464
24532465 unsigned NumVGPRsMax = ST->getAddressableNumVGPRs ();
24542466 unsigned NumSGPRsMax = ST->getAddressableNumSGPRs ();
2455- assert (NumVGPRsMax <= SQ_MAX_PGM_VGPRS);
2467+ assert (NumVGPRsMax + AGPR_OFFSET <= SQ_MAX_PGM_VGPRS);
24562468 assert (NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
24572469
24582470 RegisterEncoding Encoding = {};
2459- Encoding.VGPR0 =
2460- TRI->getEncodingValue (AMDGPU::VGPR0) & AMDGPU::HWEncoding::REG_IDX_MASK;
2461- Encoding.VGPRL = Encoding.VGPR0 + NumVGPRsMax - 1 ;
2462- Encoding.SGPR0 =
2463- TRI->getEncodingValue (AMDGPU::SGPR0) & AMDGPU::HWEncoding::REG_IDX_MASK;
2464- Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax - 1 ;
2471+ Encoding.VGPR0 = getRegPoint (AMDGPU::VGPR0, *TRI);
2472+ Encoding.VGPRL = Encoding.VGPR0 + NumVGPRsMax * 2 - 1 ;
2473+ Encoding.SGPR0 = getRegPoint (AMDGPU::SGPR0, *TRI);
2474+ Encoding.SGPRL = Encoding.SGPR0 + NumSGPRsMax * 2 - 1 ;
24652475
24662476 BlockInfos.clear ();
24672477 bool Modified = false ;
0 commit comments