Skip to content
45 changes: 24 additions & 21 deletions llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,11 +615,12 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
for (auto I = std::next(BeginMI.getIterator()); I != E; ++I) {
MachineInstr &Instr = *I;
uint16_t UnpackedOpCode = mapToUnpackedOpcode(Instr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why it's written this way, but you're making a set of redundant conditions worse. The mapToUnpackedOpcode call should be sunk and supersedes all of the other checks here (e.g., why is ib bothering checking isTerminator?)

Not sure what any of this has to do with adding the new case

bool IsUnpackable =
!(UnpackedOpCode == std::numeric_limits<uint16_t>::max());
if (Instr.isMetaInstruction())
continue;
if ((Instr.isTerminator()) ||
(TII->isNeverCoissue(Instr) &&
(UnpackedOpCode == std::numeric_limits<uint16_t>::max())) ||
(TII->isNeverCoissue(Instr) && !IsUnpackable) ||
(SIInstrInfo::modifiesModeRegister(Instr) &&
Instr.modifiesRegister(AMDGPU::EXEC, TRI)))
return;
Expand All @@ -643,7 +644,7 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TRI->regsOverlap(MFMADef, InstrMO.getReg()))
return;
}
if (UnpackedOpCode == std::numeric_limits<uint16_t>::max())
if (!IsUnpackable)
continue;

if (canUnpackingClobberRegister(Instr))
Expand Down Expand Up @@ -822,24 +823,26 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {

// TODO: Fold this into previous block, if possible. Evaluate and handle any
// side effects.
if (ST.hasGFX950Insts() || ST.hasGFX940Insts()) {
for (MachineBasicBlock &MBB : MF) {
// Unpack packed instructions overlapped by MFMAs. This allows the
// compiler to co-issue unpacked instructions with MFMA
auto SchedModel = TII->getSchedModel();
SetVector<MachineInstr *> InstrsToUnpack;
for (auto &MI : make_early_inc_range(MBB.instrs())) {
if (!SIInstrInfo::isMFMA(MI))
continue;
const MCSchedClassDesc *SchedClassDesc =
SchedModel.resolveSchedClass(&MI);
uint16_t NumMFMACycles =
SchedModel.getWriteProcResBegin(SchedClassDesc)->ReleaseAtCycle;
collectUnpackingCandidates(MI, InstrsToUnpack, NumMFMACycles);
}
for (MachineInstr *MI : InstrsToUnpack) {
performF32Unpacking(*MI);
}

// Perform the extra MF scans only for supported archs
if (!ST.hasGFX940Insts())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is another unrelated simplification you can recommit

return Changed;
for (MachineBasicBlock &MBB : MF) {
// Unpack packed instructions overlapped by MFMAs. This allows the
// compiler to co-issue unpacked instructions with MFMA
auto SchedModel = TII->getSchedModel();
SetVector<MachineInstr *> InstrsToUnpack;
for (auto &MI : make_early_inc_range(MBB.instrs())) {
if (!SIInstrInfo::isMFMA(MI))
continue;
const MCSchedClassDesc *SchedClassDesc =
SchedModel.resolveSchedClass(&MI);
uint16_t NumMFMACycles =
SchedModel.getWriteProcResBegin(SchedClassDesc)->ReleaseAtCycle;
collectUnpackingCandidates(MI, InstrsToUnpack, NumMFMACycles);
}
for (MachineInstr *MI : InstrsToUnpack) {
performF32Unpacking(*MI);
}
}

Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.