@@ -7235,24 +7235,44 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72357235 return DeferredList.contains (MI);
72367236}
72377237
7238- // 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7239- // sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7240- // subreg access properly. This can be removed after we have sgpr16 in place
7241- void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &Inst,
7238+ // legalize operand between 16bit and 32bit registers in v2s copy
7239+ // lowering (change spgr to vgpr).
7240+ // This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7241+ // size. Need to legalize the size of the operands during the vgpr lowering
7242+ // chain. This can be removed after we have sgpr16 in place
7243+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
72427244 MachineRegisterInfo &MRI) const {
7243- unsigned Opcode = Inst.getOpcode ();
7244- if (!AMDGPU::isTrue16Inst (Opcode) || !ST.useRealTrue16Insts ())
7245+ if (!ST.useRealTrue16Insts ())
72457246 return ;
72467247
7247- for (MachineOperand &Op : Inst.explicit_operands ()) {
7248+ unsigned Opcode = MI.getOpcode ();
7249+ MachineBasicBlock *MBB = MI.getParent ();
7250+
7251+ // legalize operands and check for size mismatch
7252+ for (MachineOperand &Op : MI.explicit_operands ()) {
72487253 unsigned OpIdx = Op.getOperandNo ();
72497254 if (!OpIdx)
72507255 continue ;
7251- if (Op.isReg () && RI.isVGPR (MRI, Op.getReg ())) {
7256+ if (Op.isReg () && Op. getReg (). isVirtual () && RI.isVGPR (MRI, Op.getReg ())) {
72527257 unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7253- const TargetRegisterClass *RC = RI.getRegClass (RCID);
7254- if (RI.getRegSizeInBits (*RC) == 16 ) {
7258+ const TargetRegisterClass *ExpectedRC = RI.getRegClass (RCID);
7259+ const TargetRegisterClass *RC = MRI.getRegClass (Op.getReg ());
7260+ if (32 == RI.getRegSizeInBits (*RC) &&
7261+ 16 == RI.getRegSizeInBits (*ExpectedRC)) {
72557262 Op.setSubReg (AMDGPU::lo16);
7263+ } else if (16 == RI.getRegSizeInBits (*RC) &&
7264+ 32 == RI.getRegSizeInBits (*ExpectedRC)) {
7265+ const DebugLoc &DL = MI.getDebugLoc ();
7266+ Register NewDstReg =
7267+ MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7268+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7269+ BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7270+ BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7271+ .addReg (Op.getReg ())
7272+ .addImm (AMDGPU::lo16)
7273+ .addReg (Undef)
7274+ .addImm (AMDGPU::hi16);
7275+ Op.setReg (NewDstReg);
72567276 }
72577277 }
72587278 }
@@ -7777,8 +7797,8 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77777797 return ;
77787798 }
77797799
7780- // If this is a v2s copy src from vgpr16 to sgpr32 ,
7781- // replace vgpr copy to subreg_to_reg
7800+ // If this is a copy src from 16bit reg to 32bit reg ,
7801+ // replace copy to a reg_sequence
77827802 // This can be remove after we have sgpr16 in place
77837803 if (ST.useRealTrue16Insts () && Inst.isCopy () &&
77847804 Inst.getOperand (1 ).getReg ().isVirtual () &&
@@ -7787,14 +7807,29 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77877807 if (16 == RI.getRegSizeInBits (*SrcRegRC) &&
77887808 32 == RI.getRegSizeInBits (*NewDstRC)) {
77897809 Register NewDstReg = MRI.createVirtualRegister (NewDstRC);
7810+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
77907811 BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7791- get (TargetOpcode::SUBREG_TO_REG), NewDstReg)
7792- .add (MachineOperand::CreateImm (0 ))
7793- .add (Inst.getOperand (1 ))
7794- .add (MachineOperand::CreateImm (AMDGPU::lo16));
7812+ get (AMDGPU::IMPLICIT_DEF), Undef);
7813+ BuildMI (*Inst.getParent (), &Inst, Inst.getDebugLoc (),
7814+ get (AMDGPU::REG_SEQUENCE), NewDstReg)
7815+ .addReg (Inst.getOperand (1 ).getReg ())
7816+ .addImm (AMDGPU::lo16)
7817+ .addReg (Undef)
7818+ .addImm (AMDGPU::hi16);
77957819 Inst.eraseFromParent ();
7796-
77977820 MRI.replaceRegWith (DstReg, NewDstReg);
7821+ // legalize useMI with mismatched size
7822+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7823+ E = MRI.use_end ();
7824+ I != E; ++I) {
7825+ MachineInstr &UseMI = *I->getParent ();
7826+ unsigned UseMIOpcode = UseMI.getOpcode ();
7827+ if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7828+ (16 ==
7829+ RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7830+ I->setSubReg (AMDGPU::lo16);
7831+ }
7832+ }
77987833 addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
77997834 return ;
78007835 }
0 commit comments