@@ -2239,6 +2239,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
22392239
22402240 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
22412241 MI.setDesc (get (AMDGPU::V_READLANE_B32));
2242+ MI.getMF ()->getRegInfo ().constrainRegClass (MI.getOperand (0 ).getReg (),
2243+ &AMDGPU::SReg_32_XM0RegClass);
22422244 break ;
22432245
22442246 case AMDGPU::V_MOV_B64_PSEUDO: {
@@ -6527,7 +6529,7 @@ emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII,
65276529 Register VScalarOp = ScalarOp->getReg ();
65286530
65296531 if (NumSubRegs == 1 ) {
6530- Register CurReg = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass );
6532+ Register CurReg = MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass );
65316533
65326534 BuildMI (LoopBB, I, DL, TII.get (AMDGPU::V_READFIRSTLANE_B32), CurReg)
65336535 .addReg (VScalarOp);
@@ -6559,8 +6561,10 @@ emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII,
65596561 " Unhandled register size" );
65606562
65616563 for (unsigned Idx = 0 ; Idx < NumSubRegs; Idx += 2 ) {
6562- Register CurRegLo = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
6563- Register CurRegHi = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
6564+ Register CurRegLo =
6565+ MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass);
6566+ Register CurRegHi =
6567+ MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass);
65646568
65656569 // Read the next variant <- also loop target.
65666570 BuildMI (LoopBB, I, DL, TII.get (AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
@@ -7667,9 +7671,20 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
76677671 if (Inst.isCopy () && DstReg.isPhysical () &&
76687672 RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
76697673 // TODO: Only works for 32 bit registers.
7670- BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7671- get (AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand (0 ).getReg ())
7672- .add (Inst.getOperand (1 ));
7674+ if (MRI.constrainRegClass (DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
7675+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7676+ get (AMDGPU::V_READFIRSTLANE_B32), DstReg)
7677+ .add (Inst.getOperand (1 ));
7678+ } else {
7679+ Register NewDst =
7680+ MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass);
7681+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7682+ get (AMDGPU::V_READFIRSTLANE_B32), NewDst)
7683+ .add (Inst.getOperand (1 ));
7684+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (), get (AMDGPU::COPY),
7685+ DstReg)
7686+ .addReg (NewDst);
7687+ }
76737688 Inst.eraseFromParent ();
76747689 return ;
76757690 }
0 commit comments