@@ -47,9 +47,6 @@ class SIPreEmitPeephole {
4747 const MachineBasicBlock &From,
4848 const MachineBasicBlock &To) const ;
4949 bool removeExeczBranch (MachineInstr &MI, MachineBasicBlock &SrcMBB);
50- // Check if the machine instruction being processed is a supported packed
51- // instruction.
52- bool isUnpackingSupportedInstr (MachineInstr &MI) const ;
5350 // Creates a list of packed instructions following an MFMA that are suitable
5451 // for unpacking.
5552 void collectUnpackingCandidates (MachineInstr &BeginMI,
@@ -454,23 +451,6 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
454451 return true ;
455452}
456453
457- // If support is extended to new operations, add tests in
458- // llvm/test/CodeGen/AMDGPU/unpack-non-coissue-insts-post-ra-scheduler.mir.
459- bool SIPreEmitPeephole::isUnpackingSupportedInstr (MachineInstr &MI) const {
460- if (!TII->isNeverCoissue (MI))
461- return false ;
462- unsigned Opcode = MI.getOpcode ();
463- switch (Opcode) {
464- case AMDGPU::V_PK_ADD_F32:
465- case AMDGPU::V_PK_MUL_F32:
466- case AMDGPU::V_PK_FMA_F32:
467- return true ;
468- default :
469- return false ;
470- }
471- llvm_unreachable (" Fully covered switch" );
472- }
473-
474454bool SIPreEmitPeephole::canUnpackingClobberRegister (const MachineInstr &MI) {
475455 unsigned OpCode = MI.getOpcode ();
476456 Register DstReg = MI.getOperand (0 ).getReg ();
@@ -612,10 +592,13 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
612592
613593 for (auto I = std::next (BeginMI.getIterator ()); I != E; ++I) {
614594 MachineInstr &Instr = *I;
595+ uint16_t UnpackedOpCode = mapToUnpackedOpcode (Instr);
596+ bool IsUnpackable =
597+ !(UnpackedOpCode == std::numeric_limits<uint16_t >::max ());
615598 if (Instr.isMetaInstruction ())
616599 continue ;
617600 if ((Instr.isTerminator ()) ||
618- (TII->isNeverCoissue (Instr) && !isUnpackingSupportedInstr (Instr) ) ||
601+ (TII->isNeverCoissue (Instr) && !IsUnpackable ) ||
619602 (SIInstrInfo::modifiesModeRegister (Instr) &&
620603 Instr.modifiesRegister (AMDGPU::EXEC, TRI)))
621604 return ;
@@ -639,7 +622,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
639622 if (TRI->regsOverlap (MFMADef, InstrMO.getReg ()))
640623 return ;
641624 }
642- if (!isUnpackingSupportedInstr (Instr) )
625+ if (!IsUnpackable )
643626 continue ;
644627
645628 if (canUnpackingClobberRegister (Instr))
@@ -687,8 +670,8 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
687670 bool IsHiBits) {
688671 MachineBasicBlock &MBB = *I.getParent ();
689672 const DebugLoc &DL = I.getDebugLoc ();
690- const MachineOperand *SrcMO1 = TII->getNamedOperand (I, AMDGPU::OpName::src0);
691- const MachineOperand *SrcMO2 = TII->getNamedOperand (I, AMDGPU::OpName::src1);
673+ const MachineOperand *SrcMO0 = TII->getNamedOperand (I, AMDGPU::OpName::src0);
674+ const MachineOperand *SrcMO1 = TII->getNamedOperand (I, AMDGPU::OpName::src1);
692675 Register DstReg = I.getOperand (0 ).getReg ();
693676 unsigned OpCode = I.getOpcode ();
694677 Register UnpackedDstReg = IsHiBits ? TRI->getSubReg (DstReg, AMDGPU::sub1)
@@ -702,15 +685,15 @@ MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
702685
703686 MachineInstrBuilder NewMI = BuildMI (MBB, I, DL, TII->get (UnpackedOpcode));
704687 NewMI.addDef (UnpackedDstReg); // vdst
705- addOperandAndMods (NewMI, Src0Mods, IsHiBits, *SrcMO1 );
706- addOperandAndMods (NewMI, Src1Mods, IsHiBits, *SrcMO2 );
688+ addOperandAndMods (NewMI, Src0Mods, IsHiBits, *SrcMO0 );
689+ addOperandAndMods (NewMI, Src1Mods, IsHiBits, *SrcMO1 );
707690
708691 if (AMDGPU::hasNamedOperand (OpCode, AMDGPU::OpName::src2)) {
709- const MachineOperand *SrcMO3 =
692+ const MachineOperand *SrcMO2 =
710693 TII->getNamedOperand (I, AMDGPU::OpName::src2);
711694 unsigned Src2Mods =
712695 TII->getNamedOperand (I, AMDGPU::OpName::src2_modifiers)->getImm ();
713- addOperandAndMods (NewMI, Src2Mods, IsHiBits, *SrcMO3 );
696+ addOperandAndMods (NewMI, Src2Mods, IsHiBits, *SrcMO2 );
714697 }
715698 NewMI.addImm (ClampVal); // clamp
716699 // Packed instructions do not support output modifiers. safe to assign them 0
@@ -787,9 +770,13 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {
787770
788771 // TODO: Fold this into previous block, if possible. Evaluate and handle any
789772 // side effects.
773+
774+ // Perform the extra MF scans only for supported archs
775+ if (!ST.hasGFX940Insts ())
776+ return Changed;
790777 for (MachineBasicBlock &MBB : MF) {
791- // Unpack packed instructions overlapped by MFMAs. This allows the compiler
792- // to co-issue unpacked instructions with MFMA
778+ // Unpack packed instructions overlapped by MFMAs. This allows the
779+ // compiler to co-issue unpacked instructions with MFMA
793780 auto SchedModel = TII->getSchedModel ();
794781 SetVector<MachineInstr *> InstrsToUnpack;
795782 for (auto &MI : make_early_inc_range (MBB.instrs ())) {
0 commit comments