@@ -7235,24 +7235,44 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72357235 return DeferredList.contains (MI);
72367236}
72377237
7238- // 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7239- // sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7240- // subreg access properly. This can be removed after we have sgpr16 in place
7241- void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &Inst,
7238+ // legalize operand between 16bit and 32bit registers in v2s copy
7239+ // lowering (change spgr to vgpr).
7240+ // This is mainly caused by 16bit SALU and 16bit VALU using reg with different
7241+ // size. Need to legalize the size of the operands during the vgpr lowering
7242+ // chain. This can be removed after we have sgpr16 in place
7243+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &MI,
72427244 MachineRegisterInfo &MRI) const {
7243- unsigned Opcode = Inst.getOpcode ();
7244- if (!AMDGPU::isTrue16Inst (Opcode) || !ST.useRealTrue16Insts ())
7245+ if (!ST.useRealTrue16Insts ())
72457246 return ;
72467247
7247- for (MachineOperand &Op : Inst.explicit_operands ()) {
7248+ unsigned Opcode = MI.getOpcode ();
7249+ MachineBasicBlock *MBB = MI.getParent ();
7250+
7251+ // legalize operands and check for size mismatch
7252+ for (MachineOperand &Op : MI.explicit_operands ()) {
72487253 unsigned OpIdx = Op.getOperandNo ();
72497254 if (!OpIdx)
72507255 continue ;
7251- if (Op.isReg () && RI.isVGPR (MRI, Op.getReg ())) {
7256+ if (Op.isReg () && Op. getReg (). isVirtual () && RI.isVGPR (MRI, Op.getReg ())) {
72527257 unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7253- const TargetRegisterClass *RC = RI.getRegClass (RCID);
7254- if (RI.getRegSizeInBits (*RC) == 16 ) {
7258+ const TargetRegisterClass *ExpectedRC = RI.getRegClass (RCID);
7259+ const TargetRegisterClass *RC = MRI.getRegClass (Op.getReg ());
7260+ if (32 == RI.getRegSizeInBits (*RC) &&
7261+ 16 == RI.getRegSizeInBits (*ExpectedRC)) {
72557262 Op.setSubReg (AMDGPU::lo16);
7263+ } else if (16 == RI.getRegSizeInBits (*RC) &&
7264+ 32 == RI.getRegSizeInBits (*ExpectedRC)) {
7265+ const DebugLoc &DL = MI.getDebugLoc ();
7266+ Register NewDstReg =
7267+ MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
7268+ Register Undef = MRI.createVirtualRegister (&AMDGPU::VGPR_16RegClass);
7269+ BuildMI (*MBB, MI, DL, get (AMDGPU::IMPLICIT_DEF), Undef);
7270+ BuildMI (*MBB, MI, DL, get (AMDGPU::REG_SEQUENCE), NewDstReg)
7271+ .addReg (Op.getReg ())
7272+ .addImm (AMDGPU::lo16)
7273+ .addReg (Undef)
7274+ .addImm (AMDGPU::hi16);
7275+ Op.setReg (NewDstReg);
72567276 }
72577277 }
72587278 }
@@ -7793,8 +7813,19 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77937813 .add (Inst.getOperand (1 ))
77947814 .add (MachineOperand::CreateImm (AMDGPU::lo16));
77957815 Inst.eraseFromParent ();
7796-
77977816 MRI.replaceRegWith (DstReg, NewDstReg);
7817+ // legalize useMI with mismatched size
7818+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
7819+ E = MRI.use_end ();
7820+ I != E; ++I) {
7821+ MachineInstr &UseMI = *I->getParent ();
7822+ unsigned UseMIOpcode = UseMI.getOpcode ();
7823+ if (AMDGPU::isTrue16Inst (UseMIOpcode) &&
7824+ (16 ==
7825+ RI.getRegSizeInBits (*getOpRegClass (UseMI, I.getOperandNo ())))) {
7826+ I->setSubReg (AMDGPU::lo16);
7827+ }
7828+ }
77987829 addUsersToMoveToVALUWorklist (NewDstReg, MRI, Worklist);
77997830 return ;
78007831 }
0 commit comments