@@ -7227,27 +7227,52 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72277227 return DeferredList.contains (MI);
72287228}
72297229
7230- // 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7231- // sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7232- // subreg access properly. This can be removed after we have sgpr16 in place
7233- void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &Inst,
7230+ // Legalize size mismatches between 16bit and 32bit registers in v2s copy
7231+ // lowering (change spgr to vgpr).
7232+ // This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7233+ // size. Need to legalize the size of the operands during the vgpr lowering
7234+ // chain. This can be removed after we have sgpr16 in place
7235+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI, unsigned OpIdx,
72347236 MachineRegisterInfo &MRI) const {
7235- unsigned Opcode = Inst.getOpcode ();
7236- if (!AMDGPU::isTrue16Inst (Opcode) || !ST.useRealTrue16Insts ())
7237+ if (!ST.useRealTrue16Insts ())
72377238 return ;
72387239
7239- for (MachineOperand &Op : Inst.explicit_operands ()) {
7240- unsigned OpIdx = Op.getOperandNo ();
7241- if (!OpIdx)
7242- continue ;
7243- if (Op.isReg () && RI.isVGPR (MRI, Op.getReg ())) {
7244- unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7245- const TargetRegisterClass *RC = RI.getRegClass (RCID);
7246- if (RI.getRegSizeInBits (*RC) == 16 ) {
7247- Op.setSubReg (AMDGPU::lo16);
7248- }
7249- }
7250- }
7240+ unsigned Opcode = MI.getOpcode ();
7241+ MachineBasicBlock *MBB = MI.getParent ();
7242+ // Legalize operands and check for size mismatch
7243+ if (!OpIdx || OpIdx >= MI.getNumExplicitOperands () ||
7244+ OpIdx >= get (Opcode).getNumOperands ())
7245+ return ;
7246+
7247+ MachineOperand &Op = MI.getOperand (OpIdx);
7248+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
7249+ return ;
7250+
7251+ const TargetRegisterClass *CurrRC = MRI.getRegClass (Op.getReg ());
7252+ if (!RI.isVGPRClass (CurrRC))
7253+ return ;
7254+
7255+ unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7256+ const TargetRegisterClass *ExpectedRC = RI.getRegClass (RCID);
7257+ if (RI.getMatchingSuperRegClass (CurrRC, ExpectedRC, AMDGPU::lo16)) {
7258+ Op.setSubReg (AMDGPU::lo16);
7259+ } else if (RI.getMatchingSuperRegClass (ExpectedRC, CurrRC, AMDGPU::lo16)) {
7260+ const DebugLoc &DL = MI.getDebugLoc ();
7261+ Register NewDstReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7262+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7263+ BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7264+ BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7265+ .addReg (Op.getReg ())
7266+ .addImm (AMDGPU::lo16)
7267+ .addReg (Undef)
7268+ .addImm (AMDGPU::hi16);
7269+ Op.setReg (NewDstReg);
7270+ }
7271+ }
7272+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
7273+ MachineRegisterInfo &MRI) const {
7274+ for (unsigned OpIdx = 1 ; OpIdx < MI.getNumExplicitOperands (); OpIdx++)
7275+ legalizeOperandsVALUt16 (MI, OpIdx, MRI);
72517276}
72527277
72537278void SIInstrInfo::moveToVALU (SIInstrWorklist &Worklist,
@@ -7769,15 +7794,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77697794 return ;
77707795 }
77717796
7772- // If this is a v2s copy src from 16bit to 32bit,
7773- // replace vgpr copy to reg_sequence
7797+ // If this is a v2s copy between 16bit and 32bit reg ,
7798+ // replace vgpr copy to reg_sequence/extract_subreg
77747799 // This can be remove after we have sgpr16 in place
77757800 if (ST.useRealTrue16Insts () && Inst.isCopy () &&
77767801 Inst.getOperand (1 ).getReg ().isVirtual () &&
77777802 RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
77787803 const TargetRegisterClass *SrcRegRC = getOpRegClass (Inst, 1 );
7779- if (16 == RI.getRegSizeInBits (*SrcRegRC) &&
7780- 32 == RI.getRegSizeInBits (*NewDstRC)) {
7804+ if (RI.getMatchingSuperRegClass (NewDstRC, SrcRegRC, AMDGPU::lo16)) {
77817805 Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
77827806 Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
77837807 BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
@@ -7789,7 +7813,13 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77897813 .addReg (Undef)
77907814 .addImm (AMDGPU::hi16);
77917815 Inst.eraseFromParent ();
7792-
7816+ MRI.replaceRegWith (DstReg, NewDstReg);
7817+ addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7818+ return ;
7819+ } else if (RI.getMatchingSuperRegClass (SrcRegRC, NewDstRC,
7820+ AMDGPU::lo16)) {
7821+ Inst.getOperand (1 ).setSubReg (AMDGPU::lo16);
7822+ Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
77937823 MRI.replaceRegWith (DstReg, NewDstReg);
77947824 addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
77957825 return ;
@@ -7885,23 +7915,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78857915 assert (NewDstRC);
78867916 NewDstReg = MRI.createVirtualRegister (NewDstRC);
78877917 MRI.replaceRegWith (DstReg, NewDstReg);
7888-
7889- // Check useMI of NewInstr. If used by a true16 instruction,
7890- // add a lo16 subreg access if size mismatched
7891- // This can be remove after we have sgpr16 in place
7892- if (ST.useRealTrue16Insts () && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7893- for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7894- E = MRI.use_end ();
7895- I != E; ++I) {
7896- MachineInstr &UseMI = *I->getParent ();
7897- unsigned UseMIOpcode = UseMI.getOpcode ();
7898- if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7899- (16 ==
7900- RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7901- I->setSubReg (AMDGPU::lo16);
7902- }
7903- }
7904- }
79057918 }
79067919 fixImplicitOperands (*NewInstr);
79077920
@@ -8709,6 +8722,8 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
87098722 ++I;
87108723 } while (I != E && I->getParent () == &UseMI);
87118724 } else {
8725+ legalizeOperandsVALUt16 (UseMI, OpNo, MRI);
8726+
87128727 ++I;
87138728 }
87148729 }
0 commit comments