@@ -9115,6 +9115,63 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
91159115 MachineOperand &Src1 = Inst.getOperand (2 );
91169116 const DebugLoc &DL = Inst.getDebugLoc ();
91179117
9118+ if (ST.useRealTrue16Insts ()) {
9119+ Register SrcReg0 = Src0.getReg ();
9120+ Register SrcReg1 = Src1.getReg ();
9121+
9122+ if (!RI.isVGPR (MRI, SrcReg0)) {
9123+ SrcReg0 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
9124+ BuildMI (*MBB, Inst, DL, get (AMDGPU::V_MOV_B32_e32), SrcReg0).add (Src0);
9125+ }
9126+ if (!RI.isVGPR (MRI, SrcReg1)) {
9127+ SrcReg1 = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
9128+ BuildMI (*MBB, Inst, DL, get (AMDGPU::V_MOV_B32_e32), SrcReg1).add (Src1);
9129+ }
9130+ bool isSrc0Reg16 = MRI.constrainRegClass (SrcReg0, &AMDGPU::VGPR_16RegClass);
9131+ bool isSrc1Reg16 = MRI.constrainRegClass (SrcReg1, &AMDGPU::VGPR_16RegClass);
9132+
9133+ auto NewMI = BuildMI (*MBB, Inst, DL, get (AMDGPU::REG_SEQUENCE), ResultReg);
9134+ switch (Inst.getOpcode ()) {
9135+ case AMDGPU::S_PACK_LL_B32_B16: {
9136+ NewMI
9137+ .addReg (SrcReg0, 0 ,
9138+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9139+ .addImm (AMDGPU::lo16)
9140+ .addReg (SrcReg1, 0 ,
9141+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9142+ .addImm (AMDGPU::hi16);
9143+ } break ;
9144+ case AMDGPU::S_PACK_LH_B32_B16: {
9145+ NewMI
9146+ .addReg (SrcReg0, 0 ,
9147+ isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9148+ .addImm (AMDGPU::lo16)
9149+ .addReg (SrcReg1, 0 , AMDGPU::hi16)
9150+ .addImm (AMDGPU::hi16);
9151+ } break ;
9152+ case AMDGPU::S_PACK_HL_B32_B16: {
9153+ NewMI.addReg (SrcReg0, 0 , AMDGPU::hi16)
9154+ .addImm (AMDGPU::lo16)
9155+ .addReg (SrcReg1, 0 ,
9156+ isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9157+ .addImm (AMDGPU::hi16);
9158+ } break ;
9159+ case AMDGPU::S_PACK_HH_B32_B16: {
9160+ NewMI.addReg (SrcReg0, 0 , AMDGPU::hi16)
9161+ .addImm (AMDGPU::lo16)
9162+ .addReg (SrcReg1, 0 , AMDGPU::hi16)
9163+ .addImm (AMDGPU::hi16);
9164+ } break ;
9165+ default :
9166+ llvm_unreachable (" unhandled s_pack_* instruction" );
9167+ }
9168+
9169+ MachineOperand &Dest = Inst.getOperand (0 );
9170+ MRI.replaceRegWith (Dest.getReg (), ResultReg);
9171+ addUsersToMoveToVALUWorklist (ResultReg, MRI, Worklist);
9172+ return ;
9173+ }
9174+
91189175 switch (Inst.getOpcode ()) {
91199176 case AMDGPU::S_PACK_LL_B32_B16: {
91209177 Register ImmReg = MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
0 commit comments