Skip to content

Commit 450737f

Browse files
committed
[TargetInstrInfo] Add target hook for InstrSchedModel latency. [NFCI]
These hooks already exist when using instruction itineraries for latency info, this patch adds them for the newer TargetSchedModel. Allows targets to dynamically set latency values in the DAG builder. This is useful in multi-pass schedulers like in the AMDGUP backend where we may want to schedule a region multiple times with a different machine model or tweaked latencies for a specific instruction type.
1 parent 64dba81 commit 450737f

File tree

4 files changed

+103
-75
lines changed

4 files changed

+103
-75
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,13 +1848,25 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
18481848
const MachineInstr &DefMI, unsigned DefIdx,
18491849
const MachineInstr &UseMI, unsigned UseIdx) const;
18501850

1851+
/// Compute the latency of a register data dependence (DefIdx -> UseIdx)
1852+
/// using the TargetSchedModel.
1853+
virtual std::optional<unsigned>
1854+
getOperandLatency(const TargetSchedModel &SchedModel,
1855+
const MachineInstr *DefMI, unsigned DefIdx,
1856+
const MachineInstr *UseMI, unsigned UseIdx) const;
1857+
18511858
/// Compute the instruction latency of a given instruction.
18521859
/// If the instruction has higher cost when predicated, it's returned via
18531860
/// PredCost.
18541861
virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
18551862
const MachineInstr &MI,
18561863
unsigned *PredCost = nullptr) const;
18571864

1865+
/// Compute the instruction latency using the TargetSchedModel.
1866+
virtual std::optional<unsigned>
1867+
getInstrLatency(const TargetSchedModel &TargetSchedModel,
1868+
const MachineInstr &MI) const;
1869+
18581870
virtual unsigned getPredicationCost(const MachineInstr &MI) const;
18591871

18601872
virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,

llvm/include/llvm/CodeGen/TargetSchedule.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ class TargetSchedModel {
4444
// Resource units per cycle. Latency normalization factor.
4545
unsigned ResourceLCM = 0;
4646

47-
unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
48-
4947
// EnableSchedModel and EnableSchedItins are used to control whether or not to
5048
// use the Target's {SchedMachineModel, InstrItins} for hardware infor based
5149
// Scheduling decisions. If both are enabled, as is the default, preference
@@ -203,6 +201,7 @@ class TargetSchedModel {
203201
bool UseDefaultDefLatency = true) const;
204202
LLVM_ABI unsigned computeInstrLatency(const MCInst &Inst) const;
205203
LLVM_ABI unsigned computeInstrLatency(unsigned Opcode) const;
204+
LLVM_ABI unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
206205

207206
/// Output dependency latency of a pair of defs of the same register.
208207
///

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,6 +1830,85 @@ unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
18301830
return ItinData->getStageLatency(MI.getDesc().getSchedClass());
18311831
}
18321832

1833+
std::optional<unsigned>
1834+
TargetInstrInfo::getInstrLatency(const TargetSchedModel &TargetSchedModel,
1835+
const MachineInstr &MI) const {
1836+
if (TargetSchedModel.hasInstrSchedModel()) {
1837+
const MCSchedClassDesc *SCDesc = TargetSchedModel.resolveSchedClass(&MI);
1838+
if (SCDesc->isValid())
1839+
return TargetSchedModel.computeInstrLatency(*SCDesc);
1840+
}
1841+
1842+
return std::nullopt;
1843+
}
1844+
1845+
std::optional<unsigned> TargetInstrInfo::getOperandLatency(const TargetSchedModel &SchedModel,
1846+
const MachineInstr *DefMI,
1847+
unsigned DefOperIdx,
1848+
const MachineInstr *UseMI,
1849+
unsigned UseOperIdx) const {
1850+
// Only handle the TargetSchedModel-based computation here. If no
1851+
// instruction scheduling model is available, defer to the caller.
1852+
if (!SchedModel.hasInstrSchedModel())
1853+
return std::nullopt;
1854+
1855+
const MCSchedClassDesc *SCDesc = SchedModel.resolveSchedClass(DefMI);
1856+
if (!SCDesc->isValid())
1857+
return std::nullopt;
1858+
1859+
// Compute DefIdx from operand index.
1860+
unsigned DefIdx = 0;
1861+
for (unsigned I = 0; I != DefOperIdx; ++I) {
1862+
const MachineOperand &MO = DefMI->getOperand(I);
1863+
if (MO.isReg() && MO.isDef())
1864+
++DefIdx;
1865+
}
1866+
if (DefIdx < SCDesc->NumWriteLatencyEntries) {
1867+
// Lookup the definition's write latency in SubtargetInfo.
1868+
const TargetSubtargetInfo *STI = SchedModel.getSubtargetInfo();
1869+
const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx);
1870+
unsigned WriteID = WLEntry->WriteResourceID;
1871+
unsigned Latency = WLEntry->Cycles >= 0 ? static_cast<unsigned>(WLEntry->Cycles) : 1000u;
1872+
if (!UseMI)
1873+
return Latency;
1874+
1875+
// Lookup the use's latency adjustment in SubtargetInfo.
1876+
const MCSchedClassDesc *UseDesc = SchedModel.resolveSchedClass(UseMI);
1877+
if (UseDesc->NumReadAdvanceEntries == 0)
1878+
return Latency;
1879+
// Compute UseIdx from operand index.
1880+
unsigned UseIdx = 0;
1881+
for (unsigned I = 0; I != UseOperIdx; ++I) {
1882+
const MachineOperand &MO = UseMI->getOperand(I);
1883+
if (MO.isReg() && MO.readsReg() && !MO.isDef())
1884+
++UseIdx;
1885+
}
1886+
int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
1887+
if (Advance > 0 && static_cast<unsigned>(Advance) > Latency) // unsigned wrap
1888+
return 0;
1889+
return Latency - Advance;
1890+
}
1891+
1892+
// If DefIdx does not exist in the model (e.g. implicit defs), then return
1893+
// unit latency (defaultDefLatency may be too conservative).
1894+
#ifndef NDEBUG
1895+
if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
1896+
!DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
1897+
SchedModel.getMCSchedModel()->isComplete()) {
1898+
errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
1899+
<< *DefMI
1900+
<< " (Try with MCSchedModel.CompleteModel set to false)";
1901+
llvm_unreachable("incomplete machine model");
1902+
}
1903+
#endif
1904+
1905+
// FIXME: Automatically giving all implicit defs defaultDefLatency is
1906+
// undesirable. We should only do it for defs that are known to the MC
1907+
// desc like flags. Truly implicit defs should get 1 cycle latency.
1908+
const MCSchedModel *MCSM = SchedModel.getMCSchedModel();
1909+
return DefMI->isTransient() ? 0 : defaultDefLatency(*MCSM, *DefMI);
1910+
}
1911+
18331912
bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
18341913
const MachineInstr &DefMI,
18351914
unsigned DefIdx) const {

llvm/lib/CodeGen/TargetSchedule.cpp

Lines changed: 11 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -134,37 +134,6 @@ resolveSchedClass(const MachineInstr *MI) const {
134134
return SCDesc;
135135
}
136136

137-
/// Find the def index of this operand. This index maps to the machine model and
138-
/// is independent of use operands. Def operands may be reordered with uses or
139-
/// merged with uses without affecting the def index (e.g. before/after
140-
/// regalloc). However, an instruction's def operands must never be reordered
141-
/// with respect to each other.
142-
static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
143-
unsigned DefIdx = 0;
144-
for (unsigned i = 0; i != DefOperIdx; ++i) {
145-
const MachineOperand &MO = MI->getOperand(i);
146-
if (MO.isReg() && MO.isDef())
147-
++DefIdx;
148-
}
149-
return DefIdx;
150-
}
151-
152-
/// Find the use index of this operand. This is independent of the instruction's
153-
/// def operands.
154-
///
155-
/// Note that uses are not determined by the operand's isUse property, which
156-
/// is simply the inverse of isDef. Here we consider any readsReg operand to be
157-
/// a "use". The machine model allows an operand to be both a Def and Use.
158-
static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
159-
unsigned UseIdx = 0;
160-
for (unsigned i = 0; i != UseOperIdx; ++i) {
161-
const MachineOperand &MO = MI->getOperand(i);
162-
if (MO.isReg() && MO.readsReg() && !MO.isDef())
163-
++UseIdx;
164-
}
165-
return UseIdx;
166-
}
167-
168137
// Top-level API for clients that know the operand indices. This doesn't need to
169138
// return std::optional<unsigned>, as it always returns a valid latency.
170139
unsigned TargetSchedModel::computeOperandLatency(
@@ -177,8 +146,8 @@ unsigned TargetSchedModel::computeOperandLatency(
177146
if (!hasInstrSchedModel() && !hasInstrItineraries())
178147
return DefaultDefLatency;
179148

149+
std::optional<unsigned> OperLatency;
180150
if (hasInstrItineraries()) {
181-
std::optional<unsigned> OperLatency;
182151
if (UseMI) {
183152
OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
184153
*UseMI, UseOperIdx);
@@ -195,41 +164,11 @@ unsigned TargetSchedModel::computeOperandLatency(
195164
}
196165

197166
// hasInstrSchedModel()
198-
const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
199-
unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
200-
if (DefIdx < SCDesc->NumWriteLatencyEntries) {
201-
// Lookup the definition's write latency in SubtargetInfo.
202-
const MCWriteLatencyEntry *WLEntry =
203-
STI->getWriteLatencyEntry(SCDesc, DefIdx);
204-
unsigned WriteID = WLEntry->WriteResourceID;
205-
unsigned Latency = capLatency(WLEntry->Cycles);
206-
if (!UseMI)
207-
return Latency;
208-
209-
// Lookup the use's latency adjustment in SubtargetInfo.
210-
const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
211-
if (UseDesc->NumReadAdvanceEntries == 0)
212-
return Latency;
213-
unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
214-
int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
215-
if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
216-
return 0;
217-
return Latency - Advance;
218-
}
219-
// If DefIdx does not exist in the model (e.g. implicit defs), then return
220-
// unit latency (defaultDefLatency may be too conservative).
221-
#ifndef NDEBUG
222-
if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
223-
!DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
224-
SchedModel.isComplete()) {
225-
errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
226-
<< *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
227-
llvm_unreachable("incomplete machine model");
228-
}
229-
#endif
230-
// FIXME: Automatically giving all implicit defs defaultDefLatency is
231-
// undesirable. We should only do it for defs that are known to the MC
232-
// desc like flags. Truly implicit defs should get 1 cycle latency.
167+
OperLatency =
168+
TII->getOperandLatency(*this, DefMI, DefOperIdx, UseMI, UseOperIdx);
169+
if (OperLatency)
170+
return *OperLatency;
171+
233172
return DefMI->isTransient() ? 0 : DefaultDefLatency;
234173
}
235174

@@ -259,12 +198,11 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
259198
(!hasInstrSchedModel() && !UseDefaultDefLatency))
260199
return TII->getInstrLatency(&InstrItins, *MI);
261200

262-
if (hasInstrSchedModel()) {
263-
const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
264-
if (SCDesc->isValid())
265-
return computeInstrLatency(*SCDesc);
266-
}
267-
return TII->defaultDefLatency(SchedModel, *MI);
201+
std::optional<unsigned> InstrLatency;
202+
// This is used by subtargets that define an InstrSchedModel.
203+
InstrLatency = TII->getInstrLatency(*this, *MI);
204+
205+
return InstrLatency ? *InstrLatency : TII->defaultDefLatency(SchedModel, *MI);
268206
}
269207

270208
unsigned TargetSchedModel::

0 commit comments

Comments
 (0)