@@ -7227,48 +7227,53 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72277227 return DeferredList.contains (MI);
72287228}
72297229
7230- // legalize operand between 16bit and 32bit registers in v2s copy
7230+ // Legalize size mismatches between 16bit and 32bit registers in v2s copy
72317231// lowering (change spgr to vgpr).
72327232// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
72337233// size. Need to legalize the size of the operands during the vgpr lowering
72347234// chain. This can be removed after we have sgpr16 in place
7235- void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
7235+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI, unsigned OpIdx,
72367236 MachineRegisterInfo &MRI) const {
72377237 if (!ST.useRealTrue16Insts ())
72387238 return ;
72397239
72407240 unsigned Opcode = MI.getOpcode ();
72417241 MachineBasicBlock *MBB = MI.getParent ();
7242+ // Legalize operands and check for size mismatch
7243+ if (!OpIdx || OpIdx >= MI.getNumExplicitOperands () ||
7244+ OpIdx >= get (Opcode).getNumOperands ())
7245+ return ;
72427246
7243- // legalize operands and check for size mismatch
7244- for (MachineOperand &Op : MI.explicit_operands ()) {
7245- unsigned OpIdx = Op.getOperandNo ();
7246- if (!OpIdx)
7247- continue ;
7248- if (Op.isReg () && Op.getReg ().isVirtual ()) {
7249- const TargetRegisterClass *DefRC = MRI.getRegClass (Op.getReg ());
7250- if (!RI.isVGPRClass (DefRC))
7251- continue ;
7252- unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7253- const TargetRegisterClass *UseRC = RI.getRegClass (RCID);
7254- if (RI.getMatchingSuperRegClass (DefRC, UseRC, AMDGPU::lo16)) {
7255- Op.setSubReg (AMDGPU::lo16);
7256- } else if (RI.getMatchingSuperRegClass (UseRC, DefRC, AMDGPU::lo16)) {
7257- const DebugLoc &DL = MI.getDebugLoc ();
7258- Register NewDstReg =
7259- MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7260- Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7261- BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7262- BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7263- .addReg (Op.getReg ())
7264- .addImm (AMDGPU::lo16)
7265- .addReg (Undef)
7266- .addImm (AMDGPU::hi16);
7267- Op.setReg (NewDstReg);
7268- }
7269- }
7247+ MachineOperand &Op = MI.getOperand (OpIdx);
7248+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
7249+ return ;
7250+
7251+ const TargetRegisterClass *CurrRC = MRI.getRegClass (Op.getReg ());
7252+ if (!RI.isVGPRClass (CurrRC))
7253+ return ;
7254+
7255+ unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7256+ const TargetRegisterClass *ExpectedRC = RI.getRegClass (RCID);
7257+ if (RI.getMatchingSuperRegClass (CurrRC, ExpectedRC, AMDGPU::lo16)) {
7258+ Op.setSubReg (AMDGPU::lo16);
7259+ } else if (RI.getMatchingSuperRegClass (ExpectedRC, CurrRC, AMDGPU::lo16)) {
7260+ const DebugLoc &DL = MI.getDebugLoc ();
7261+ Register NewDstReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7262+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7263+ BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7264+ BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7265+ .addReg (Op.getReg ())
7266+ .addImm (AMDGPU::lo16)
7267+ .addReg (Undef)
7268+ .addImm (AMDGPU::hi16);
7269+ Op.setReg (NewDstReg);
72707270 }
72717271}
7272+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
7273+ MachineRegisterInfo &MRI) const {
7274+ for (unsigned OpIdx = 1 ; OpIdx < MI.getNumExplicitOperands (); OpIdx++)
7275+ legalizeOperandsVALUt16 (MI, OpIdx, MRI);
7276+ }
72727277
72737278void SIInstrInfo::moveToVALU (SIInstrWorklist &Worklist,
72747279 MachineDominatorTree *MDT) const {
@@ -7789,15 +7794,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77897794 return ;
77907795 }
77917796
7792- // If this is a v2s copy src from 16bit to 32bit,
7793- // replace vgpr copy to reg_sequence
7797+ // If this is a v2s copy between 16bit and 32bit reg ,
7798+ // replace vgpr copy to reg_sequence/extract_subreg
77947799 // This can be remove after we have sgpr16 in place
77957800 if (ST.useRealTrue16Insts () && Inst.isCopy () &&
77967801 Inst.getOperand (1 ).getReg ().isVirtual () &&
77977802 RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
77987803 const TargetRegisterClass *SrcRegRC = getOpRegClass (Inst, 1 );
7799- if (16 == RI.getRegSizeInBits (*SrcRegRC) &&
7800- 32 == RI.getRegSizeInBits (*NewDstRC)) {
7804+ if (RI.getMatchingSuperRegClass (NewDstRC, SrcRegRC, AMDGPU::lo16)) {
78017805 Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
78027806 Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
78037807 BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
@@ -7810,18 +7814,13 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78107814 .addImm (AMDGPU::hi16);
78117815 Inst.eraseFromParent ();
78127816 MRI.replaceRegWith (DstReg, NewDstReg);
7813- // legalize useMI with mismatched size
7814- for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7815- E = MRI.use_end ();
7816- I != E; ++I) {
7817- MachineInstr &UseMI = *I->getParent ();
7818- unsigned UseMIOpcode = UseMI.getOpcode ();
7819- if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7820- (16 ==
7821- RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7822- I->setSubReg (AMDGPU::lo16);
7823- }
7824- }
7817+ addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
7818+ return ;
7819+ } else if (RI.getMatchingSuperRegClass (SrcRegRC, NewDstRC,
7820+ AMDGPU::lo16)) {
7821+ Inst.getOperand (1 ).setSubReg (AMDGPU::lo16);
7822+ Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
7823+ MRI.replaceRegWith (DstReg, NewDstReg);
78257824 addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
78267825 return ;
78277826 }
@@ -7916,23 +7915,6 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
79167915 assert (NewDstRC);
79177916 NewDstReg = MRI.createVirtualRegister (NewDstRC);
79187917 MRI.replaceRegWith (DstReg, NewDstReg);
7919-
7920- // Check useMI of NewInstr. If used by a true16 instruction,
7921- // add a lo16 subreg access if size mismatched
7922- // This can be remove after we have sgpr16 in place
7923- if (ST.useRealTrue16Insts () && NewDstRC == &AMDGPU::VGPR_32RegClass) {
7924- for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7925- E = MRI.use_end ();
7926- I != E; ++I) {
7927- MachineInstr &UseMI = *I->getParent ();
7928- unsigned UseMIOpcode = UseMI.getOpcode ();
7929- if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7930- (16 ==
7931- RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7932- I->setSubReg (AMDGPU::lo16);
7933- }
7934- }
7935- }
79367918 }
79377919 fixImplicitOperands (*NewInstr);
79387920
@@ -8740,6 +8722,8 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
87408722 ++I;
87418723 } while (I != E && I->getParent () == &UseMI);
87428724 } else {
8725+ legalizeOperandsVALUt16 (UseMI, OpNo, MRI);
8726+
87438727 ++I;
87448728 }
87458729 }
0 commit comments