@@ -7228,6 +7228,29 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
72287228 return DeferredList.contains (MI);
72297229}
72307230
7231+ // 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
7232+ // sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
7233+ // subreg access properly. This can be removed after we have sgpr16 in place
7234+ void SIInstrInfo::legalizeOperandsVALUt16 (MachineInstr &Inst,
7235+ MachineRegisterInfo &MRI) const {
7236+ unsigned Opcode = Inst.getOpcode ();
7237+ if (!AMDGPU::isTrue16Inst (Opcode) || !ST.useRealTrue16Insts ())
7238+ return ;
7239+
7240+ for (MachineOperand &Op : Inst.explicit_operands ()) {
7241+ unsigned OpIdx = Op.getOperandNo ();
7242+ if (!OpIdx)
7243+ continue ;
7244+ if (Op.isReg () && RI.isVGPR (MRI, Op.getReg ())) {
7245+ unsigned RCID = get (Opcode).operands ()[OpIdx].RegClass ;
7246+ const TargetRegisterClass *RC = RI.getRegClass (RCID);
7247+ if (RI.getRegSizeInBits (*RC) == 16 ) {
7248+ Op.setSubReg (AMDGPU::lo16);
7249+ }
7250+ }
7251+ }
7252+ }
7253+
72317254void SIInstrInfo::moveToVALU (SIInstrWorklist &Worklist,
72327255 MachineDominatorTree *MDT) const {
72337256
@@ -7613,6 +7636,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
76137636 .add (Inst.getOperand (0 ))
76147637 .add (Inst.getOperand (1 ));
76157638 }
7639+ legalizeOperandsVALUt16 (*NewInstr, MRI);
76167640 legalizeOperands (*NewInstr, MDT);
76177641 int SCCIdx = Inst.findRegisterDefOperandIdx (AMDGPU::SCC, /* TRI=*/ nullptr );
76187642 MachineOperand SCCOp = Inst.getOperand (SCCIdx);
@@ -7682,6 +7706,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
76827706 .addImm (0 ) // omod
76837707 .addImm (0 ); // opsel0
76847708 MRI.replaceRegWith (Inst.getOperand (0 ).getReg (), NewDst);
7709+ legalizeOperandsVALUt16 (*NewInstr, MRI);
76857710 legalizeOperands (*NewInstr, MDT);
76867711 addUsersToMoveToVALUWorklist (NewDst, MRI, Worklist);
76877712 Inst.eraseFromParent ();
@@ -7747,6 +7772,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77477772
77487773 // If this is a v2s copy src from vgpr16 to sgpr32,
77497774 // replace vgpr copy to subreg_to_reg
7775+ // This can be remove after we have sgpr16 in place
77507776 if (ST.useRealTrue16Insts () && Inst.isCopy () &&
77517777 Inst.getOperand (1 ).getReg ().isVirtual () &&
77527778 RI.isVGPR (MRI, Inst.getOperand (1 ).getReg ())) {
@@ -7785,11 +7811,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77857811 NewInstr.addImm (0 );
77867812 if (AMDGPU::hasNamedOperand (NewOpcode, AMDGPU::OpName::src0)) {
77877813 MachineOperand Src = Inst.getOperand (1 );
7788- if (AMDGPU::isTrue16Inst (NewOpcode) && ST.useRealTrue16Insts () &&
7789- Src.isReg () && RI.isVGPR (MRI, Src.getReg ()))
7790- NewInstr.addReg (Src.getReg (), 0 , AMDGPU::lo16);
7791- else
7792- NewInstr->addOperand (Src);
7814+ NewInstr->addOperand (Src);
77937815 }
77947816
77957817 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
@@ -7863,6 +7885,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78637885
78647886 // Check useMI of NewInstr. If used by a true16 instruction,
78657887 // add a lo16 subreg access if size mismatched
7888+ // This can be remove after we have sgpr16 in place
78667889 if (ST.useRealTrue16Insts () && NewDstRC == &AMDGPU::VGPR_32RegClass) {
78677890 for (MachineRegisterInfo::use_iterator I = MRI.use_begin (NewDstReg),
78687891 E = MRI.use_end ();
@@ -7878,6 +7901,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
78787901 }
78797902 }
78807903 fixImplicitOperands (*NewInstr);
7904+
7905+ legalizeOperandsVALUt16 (*NewInstr, MRI);
7906+
78817907 // Legalize the operands
78827908 legalizeOperands (*NewInstr, MDT);
78837909 if (NewDstReg)
0 commit comments