@@ -751,26 +751,25 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
751751 MCRegister MCReg = AMDGPU::getMCReg (Op.getReg (), *ST);
752752 unsigned RegIdx = TRI->getHWRegIndex (MCReg);
753753 assert (isUInt<8 >(RegIdx));
754- unsigned Reg = (RegIdx << 1 ) | (AMDGPU::isHi16Reg (MCReg, *TRI) ? 1 : 0 );
755754
756755 const TargetRegisterClass *RC = TRI->getPhysRegBaseClass (Op.getReg ());
757756 unsigned Size = TRI->getRegSizeInBits (*RC);
758757
759758 // AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
760759 if (TRI->isVectorRegister (*MRI, Op.getReg ())) {
761- assert (Reg <= SQ_MAX_PGM_VGPRS);
760+ unsigned Reg = (RegIdx << 1 ) | (AMDGPU::isHi16Reg (MCReg, *TRI) ? 1 : 0 );
761+ assert (Reg <= AGPR_OFFSET);
762762 Result.first = Reg;
763763 if (TRI->isAGPR (*MRI, Op.getReg ()))
764764 Result.first += AGPR_OFFSET;
765765 assert (Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
766766 assert (Size % 16 == 0 );
767767 Result.second = Result.first + (Size / 16 );
768- } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) &&
769- (Reg >> 1 ) < SQ_MAX_PGM_SGPRS) {
768+ } else if (TRI->isSGPRReg (*MRI, Op.getReg ()) && RegIdx < SQ_MAX_PGM_SGPRS) {
770769 // SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
771770 // sources like SRC_PRIVATE_BASE.
772- Result.first = (Reg >> 1 ) + NUM_ALL_VGPRS;
773- Result.second = Result.first + (( Size + 16 ) / 32 );
771+ Result.first = RegIdx + NUM_ALL_VGPRS;
772+ Result.second = Result.first + divideCeil ( Size, 32 );
774773 } else {
775774 return {-1 , -1 };
776775 }
0 commit comments