@@ -457,9 +457,9 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
457457// If support is extended to new operations, add tests in
458458// llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
459459bool SIPreEmitPeephole::isUnpackingSupportedInstr (MachineInstr &MI) const {
460- unsigned Opcode = MI.getOpcode ();
461460 if (!TII->isNeverCoissue (MI))
462461 return false ;
462+ unsigned Opcode = MI.getOpcode ();
463463 switch (Opcode) {
464464 case AMDGPU::V_PK_ADD_F32:
465465 case AMDGPU::V_PK_MUL_F32:
@@ -516,8 +516,7 @@ bool SIPreEmitPeephole::canUnpackingClobberRegister(
516516 const MachineOperand *Src2MO =
517517 TII->getNamedOperand (MI, AMDGPU::OpName::src2);
518518 if (Src2MO && Src2MO->isReg ()) {
519- Register SrcReg2 =
520- TII->getNamedOperand (MI, AMDGPU::OpName::src2)->getReg ();
519+ Register SrcReg2 = Src2MO->getReg ();
521520 unsigned Src2Mods =
522521 TII->getNamedOperand (MI, AMDGPU::OpName::src2_modifiers)->getImm ();
523522 Register HiSrc2Reg = (Src2Mods & SISrcMods::OP_SEL_1)
@@ -628,7 +627,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
628627 SchedModel.getWriteProcResBegin (InstrSchedClassDesc)->ReleaseAtCycle ;
629628 TotalCyclesBetweenCandidates += Latency;
630629
631- if (TotalCyclesBetweenCandidates > NumMFMACycles - 1 )
630+ if (TotalCyclesBetweenCandidates >= NumMFMACycles - 1 )
632631 return ;
633632 // Identify register dependencies between those used by the MFMA
634633 // instruction and the following packed instructions. Also checks for
@@ -663,8 +662,7 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
663662 MachineOperand DstOp = I.getOperand (0 );
664663
665664 uint16_t UnpackedOpcode = mapToUnpackedOpcode (I);
666- if (UnpackedOpcode == std::numeric_limits<uint16_t >::max ())
667- return ;
665+ assert (UnpackedOpcode != std::numeric_limits<uint16_t >::max () && " Unsupported Opcode" );
668666
669667 MachineInstrBuilder Op0LOp1L =
670668 createUnpackedMI (I, UnpackedOpcode, /* IsHiBits=*/ false );
0 commit comments