@@ -130,10 +130,10 @@ enum WaitEventType {
130130// We reserve a fixed number of VGPR slots in the scoring tables for
131131// special tokens like SCMEM_LDS (needed for buffer load to LDS).
132132enum RegisterMapping {
133- SQ_MAX_PGM_VGPRS = 512 , // Maximum programmable VGPRs across all targets.
134- AGPR_OFFSET = 256 , // Maximum programmable ArchVGPRs across all targets.
135- SQ_MAX_PGM_SGPRS = 128 , // Maximum programmable SGPRs across all targets.
136- NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
133+ SQ_MAX_PGM_VGPRS = 1024 , // Maximum programmable VGPRs across all targets.
134+ AGPR_OFFSET = 512 , // Maximum programmable ArchVGPRs across all targets.
135+ SQ_MAX_PGM_SGPRS = 256 , // Maximum programmable SGPRs across all targets.
136+ NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
137137 // Artificial register slots to track LDS writes into specific LDS locations
138138 // if a location is known. When slots are exhausted or location is
139139 // unknown use the first slot. The first slot is also always updated in
@@ -748,26 +748,34 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
748748
749749 RegInterval Result;
750750
751- unsigned Reg = TRI->getEncodingValue (AMDGPU::getMCReg (Op.getReg (), *ST)) &
752- AMDGPU::HWEncoding::REG_IDX_MASK;
751+ MCRegister MCReg = AMDGPU::getMCReg (Op.getReg (), *ST);
752+ unsigned RegIdx = TRI->getHWRegIndex (MCReg);
753+ assert (isUInt<8 >(RegIdx));
754+ unsigned Reg = (RegIdx << 1 ) | (AMDGPU::isHi16Reg (MCReg, *TRI) ? 1 : 0 );
753755
756+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
757+ unsigned Size = TRI->getRegSizeInBits (*RC);
758+
759+ // AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
754760 if (TRI->isVectorRegister (*MRI, Op.getReg ())) {
755761 assert (Reg <= SQ_MAX_PGM_VGPRS);
756762 Result.first = Reg;
757763 if (TRI->isAGPR (*MRI, Op.getReg ()))
758764 Result.first += AGPR_OFFSET;
759765 assert (Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
760- } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) && Reg < SQ_MAX_PGM_SGPRS) {
761- // SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
762- // sources like SRC_PRIVATE_BASE.
763- Result.first = Reg + NUM_ALL_VGPRS;
764- } else {
766+ assert (Size % 16 == 0 );
767+ Result.second = Result.first + (Size / 16 );
768+ } else if (TRI->isSGPRReg (*MRI, Op.getReg ())) {
769+ assert (Reg < SQ_MAX_PGM_SGPRS * 2 );
770+ Result.first = (Reg >> 1 ) + NUM_ALL_VGPRS;
771+ assert (Result.first >= NUM_ALL_VGPRS &&
772+ Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
773+ Result.second = Result.first + divideCeil (Size, 32 );
774+ }
775+ // TODO: Handle TTMP
776+ // else if (TRI->isTTMP(*MRI, Reg.getReg())) ...
777+ else
765778 return {-1 , -1 };
766- }
767-
768- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
769- unsigned Size = TRI->getRegSizeInBits (*RC);
770- Result.second = Result.first + ((Size + 16 ) / 32 );
771779
772780 return Result;
773781}
0 commit comments