@@ -9072,6 +9072,67 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
90729072 MachineOperand &Src1 = Inst.getOperand (2 );
90739073 const DebugLoc &DL = Inst.getDebugLoc ();
90749074
9075+ if (ST.useRealTrue16Insts ()) {
9076+ Register SrcReg0, SrcReg1;
9077+ if (!Src0.isReg () || !RI.isVGPR (MRI, Src0.getReg ())) {
9078+ SrcReg0 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
9079+ BuildMI (*MBB, Inst, DL, get (AMDGPU::V_MOV_B32_e32), SrcReg0).add (Src0);
9080+ } else {
9081+ SrcReg0 = Src0.getReg ();
9082+ }
9083+
9084+ if (!Src1.isReg () || !RI.isVGPR (MRI, Src1.getReg ())) {
9085+ SrcReg1 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
9086+ BuildMI (*MBB, Inst, DL, get (AMDGPU::V_MOV_B32_e32), SrcReg1).add (Src1);
9087+ } else {
9088+ SrcReg1 = Src1.getReg ();
9089+ }
9090+
9091+ bool isSrc0Reg16 = MRI.constrainRegClass (SrcReg0, &AMDGPU::VGPR_16RegClass);
9092+ bool isSrc1Reg16 = MRI.constrainRegClass (SrcReg1, &AMDGPU::VGPR_16RegClass);
9093+
9094+ auto NewMI = BuildMI (*MBB, Inst, DL, get (AMDGPU::REG_SEQUENCE), ResultReg);
9095+ switch (Inst.getOpcode ()) {
9096+ case AMDGPU::S_PACK_LL_B32_B16:
9097+ NewMI
9098+ .addReg (SrcReg0, 0 ,
9099+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9100+ .addImm (AMDGPU::lo16)
9101+ .addReg (SrcReg1, 0 ,
9102+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9103+ .addImm (AMDGPU::hi16);
9104+ break ;
9105+ case AMDGPU::S_PACK_LH_B32_B16:
9106+ NewMI
9107+ .addReg (SrcReg0, 0 ,
9108+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9109+ .addImm (AMDGPU::lo16)
9110+ .addReg (SrcReg1, 0 , AMDGPU::hi16)
9111+ .addImm (AMDGPU::hi16);
9112+ break ;
9113+ case AMDGPU::S_PACK_HL_B32_B16:
9114+ NewMI.addReg (SrcReg0, 0 , AMDGPU::hi16)
9115+ .addImm (AMDGPU::lo16)
9116+ .addReg (SrcReg1, 0 ,
9117+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9118+ .addImm (AMDGPU::hi16);
9119+ break ;
9120+ case AMDGPU::S_PACK_HH_B32_B16:
9121+ NewMI.addReg (SrcReg0, 0 , AMDGPU::hi16)
9122+ .addImm (AMDGPU::lo16)
9123+ .addReg (SrcReg1, 0 , AMDGPU::hi16)
9124+ .addImm (AMDGPU::hi16);
9125+ break ;
9126+ default :
9127+ llvm_unreachable (" unhandled s_pack_* instruction" );
9128+ }
9129+
9130+ MachineOperand &Dest = Inst.getOperand (0 );
9131+ MRI.replaceRegWith (Dest.getReg (), ResultReg);
9132+ addUsersToMoveToVALUWorklist (ResultReg, MRI, Worklist);
9133+ return ;
9134+ }
9135+
90759136 switch (Inst.getOpcode ()) {
90769137 case AMDGPU::S_PACK_LL_B32_B16: {
90779138 Register ImmReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
0 commit comments