@@ -2230,6 +2230,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
22302230
22312231 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
22322232 MI.setDesc (get (AMDGPU::V_READLANE_B32));
2233+ MI.getMF ()->getRegInfo ().constrainRegClass (MI.getOperand (0 ).getReg (),
2234+ &AMDGPU::SReg_32_XM0RegClass);
22332235 break ;
22342236
22352237 case AMDGPU::V_MOV_B64_PSEUDO: {
@@ -6352,7 +6354,7 @@ static void emitLoadScalarOpsFromVGPRLoop(
63526354 Register VScalarOp = ScalarOp->getReg ();
63536355
63546356 if (NumSubRegs == 1 ) {
6355- Register CurReg = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass );
6357+ Register CurReg = MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass );
63566358
63576359 BuildMI (LoopBB, I, DL, TII.get (AMDGPU::V_READFIRSTLANE_B32), CurReg)
63586360 .addReg (VScalarOp);
@@ -6383,8 +6385,10 @@ static void emitLoadScalarOpsFromVGPRLoop(
63836385 " Unhandled register size" );
63846386
63856387 for (unsigned Idx = 0 ; Idx < NumSubRegs; Idx += 2 ) {
6386- Register CurRegLo = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
6387- Register CurRegHi = MRI.createVirtualRegister (&AMDGPU::SGPR_32RegClass);
6388+ Register CurRegLo =
6389+ MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass);
6390+ Register CurRegHi =
6391+ MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass);
63886392
63896393 // Read the next variant <- also loop target.
63906394 BuildMI (LoopBB, I, DL, TII.get (AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
@@ -7440,9 +7444,20 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
74407444 if (Inst.isCopy () && DstReg.isPhysical () &&
74417445 RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
74427446 // TODO: Only works for 32 bit registers.
7443- BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7444- get (AMDGPU::V_READFIRSTLANE_B32), Inst.getOperand (0 ).getReg ())
7445- .add (Inst.getOperand (1 ));
7447+ if (MRI.constrainRegClass (DstReg, &AMDGPU::SReg_32_XM0RegClass)) {
7448+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7449+ get (AMDGPU::V_READFIRSTLANE_B32), DstReg)
7450+ .add (Inst.getOperand (1 ));
7451+ } else {
7452+ Register NewDst =
7453+ MRI.createVirtualRegister (&AMDGPU::SReg_32_XM0RegClass);
7454+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7455+ get (AMDGPU::V_READFIRSTLANE_B32), NewDst)
7456+ .add (Inst.getOperand (1 ));
7457+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (), get (AMDGPU::COPY),
7458+ DstReg)
7459+ .addReg (NewDst);
7460+ }
74467461 Inst.eraseFromParent ();
74477462 return ;
74487463 }
0 commit comments