@@ -130,10 +130,10 @@ enum WaitEventType {
130130// We reserve a fixed number of VGPR slots in the scoring tables for
131131// special tokens like SCMEM_LDS (needed for buffer load to LDS).
132132enum RegisterMapping {
133- SQ_MAX_PGM_VGPRS = 512 , // Maximum programmable VGPRs across all targets.
134- AGPR_OFFSET = 256 , // Maximum programmable ArchVGPRs across all targets.
135- SQ_MAX_PGM_SGPRS = 128 , // Maximum programmable SGPRs across all targets.
136- NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
133+ SQ_MAX_PGM_VGPRS = 1024 , // Maximum programmable VGPRs across all targets.
134+ AGPR_OFFSET = 512 , // Maximum programmable ArchVGPRs across all targets.
135+ SQ_MAX_PGM_SGPRS = 128 , // Maximum programmable SGPRs across all targets.
136+ NUM_EXTRA_VGPRS = 9 , // Reserved slots for DS.
137137 // Artificial register slots to track LDS writes into specific LDS locations
138138 // if a location is known. When slots are exhausted or location is
139139 // unknown use the first slot. The first slot is also always updated in
@@ -748,27 +748,32 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
748748
749749 RegInterval Result;
750750
751- unsigned Reg = TRI->getEncodingValue (AMDGPU::getMCReg (Op.getReg (), *ST)) &
752- AMDGPU::HWEncoding::REG_IDX_MASK;
751+ MCRegister MCReg = AMDGPU::getMCReg (Op.getReg (), *ST);
752+ unsigned RegIdx = TRI->getHWRegIndex (MCReg);
753+ assert (isUInt<8 >(RegIdx));
753754
755+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
756+ unsigned Size = TRI->getRegSizeInBits (*RC);
757+
758+ // AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
754759 if (TRI->isVectorRegister (*MRI, Op.getReg ())) {
755- assert (Reg <= SQ_MAX_PGM_VGPRS);
760+ unsigned Reg = RegIdx << 1 | (AMDGPU::isHi16Reg (MCReg, *TRI) ? 1 : 0 );
761+ assert (Reg < AGPR_OFFSET);
756762 Result.first = Reg;
757763 if (TRI->isAGPR (*MRI, Op.getReg ()))
758764 Result.first += AGPR_OFFSET;
759765 assert (Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
760- } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) && Reg < SQ_MAX_PGM_SGPRS) {
766+ assert (Size % 16 == 0 );
767+ Result.second = Result.first + (Size / 16 );
768+ } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) && RegIdx < SQ_MAX_PGM_SGPRS) {
761769 // SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
762770 // sources like SRC_PRIVATE_BASE.
763- Result.first = Reg + NUM_ALL_VGPRS;
771+ Result.first = RegIdx + NUM_ALL_VGPRS;
772+ Result.second = Result.first + divideCeil (Size, 32 );
764773 } else {
765774 return {-1 , -1 };
766775 }
767776
768- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
769- unsigned Size = TRI->getRegSizeInBits (*RC);
770- Result.second = Result.first + ((Size + 16 ) / 32 );
771-
772777 return Result;
773778}
774779
0 commit comments