Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 79 additions & 22 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,51 +495,110 @@ hasHazard(StateT State,
return false;
}

// Returns a minimum wait states since \p I walking all predecessors.
// Only scans until \p IsExpired does not return true.
// Can only be run in a hazard recognizer mode.
static int getWaitStatesSince(
// Update \p WaitStates while iterating from \p I to hazard in \p MBB.
static HazardFnResult countWaitStatesSince(
GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
MachineBasicBlock::const_reverse_instr_iterator I, int &WaitStates,
IsExpiredFn IsExpired, GetNumWaitStatesFn GetNumWaitStates) {
for (auto E = MBB->instr_rend(); I != E; ++I) {
// Don't add WaitStates for parent BUNDLE instructions.
if (I->isBundle())
continue;

if (IsHazard(*I))
return WaitStates;
return HazardFound;

if (I->isInlineAsm())
continue;

WaitStates += GetNumWaitStates(*I);

if (IsExpired(*I, WaitStates))
return std::numeric_limits<int>::max();
return HazardExpired;
}

return NoHazardFound;
}

// Implements predecessor search for getWaitStatesSince.
static int getWaitStatesSinceImpl(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason for this to be split out into a separate function, now that it is not recursive?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Allow renaming of some of the variables to avoid programming errors (on my part).
  • Hint the compiler that this is the "out of line" case.

GCNHazardRecognizer::IsHazardFn IsHazard,
const MachineBasicBlock *InitialMBB, int InitialWaitStates,
IsExpiredFn IsExpired,
GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
DenseMap<const MachineBasicBlock *, int> Visited;

// Build worklist of predecessors.
// Note: use queue so search is breadth first, which reduces search space
// when a hazard is found.
SmallVector<const MachineBasicBlock *> Worklist;
for (MachineBasicBlock *Pred : InitialMBB->predecessors()) {
Visited[Pred] = InitialWaitStates;
Worklist.push_back(Pred);
}

// Find minimum wait states to hazard or determine that all paths expire.
int MinWaitStates = std::numeric_limits<int>::max();
for (MachineBasicBlock *Pred : MBB->predecessors()) {
if (!Visited.insert(Pred).second)
continue;
unsigned Idx = 0;
while (Idx < Worklist.size()) {
const MachineBasicBlock *MBB = Worklist[Idx++];
int WaitStates = Visited[MBB];

// Make sure that worklist capacity is reused in large CFGs.
if (Idx >= 1024) {
Worklist.erase(Worklist.begin(), Worklist.begin() + (Idx - 1));
Idx = 0;
}

int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates,
IsExpired, Visited, GetNumWaitStates);
// No reason to search blocks when wait states exceed established minimum.
if (WaitStates >= MinWaitStates)
continue;

MinWaitStates = std::min(MinWaitStates, W);
// Search for hazard
auto Search = countWaitStatesSince(IsHazard, MBB, MBB->instr_rbegin(),
WaitStates, IsExpired, GetNumWaitStates);
if (Search == HazardFound) {
// Update minimum.
MinWaitStates = std::min(MinWaitStates, WaitStates);
} else if (Search == NoHazardFound && WaitStates < MinWaitStates) {
// Search predecessors.
for (MachineBasicBlock *Pred : MBB->predecessors()) {
if (!Visited.contains(Pred) || WaitStates < Visited[Pred]) {
// Store lowest wait states required to visit this block.
Visited[Pred] = WaitStates;
Worklist.push_back(Pred);
}
}
}
}

return MinWaitStates;
}

// Returns minimum wait states since \p I walking all predecessors.
// Only scans until \p IsExpired does not return true.
// Can only be run in a hazard recognizer mode.
static int getWaitStatesSince(
GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
IsExpiredFn IsExpired,
GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
// Scan this block from I.
auto InitSearch = countWaitStatesSince(IsHazard, MBB, I, WaitStates,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than call countWaitStatesSince here, can't you immediately call into getWaitStatesSinceImpl but initialize the worklist to just MBB? I.e. common up this function with the body of the loop in getWaitStatesSinceImpl? I guess the complexity is that each item on the worklist would have to be an (MBB, iterator) pair, so that you know at what point within each MBB to start searching.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used this approach in an early draft, but it complicates the logic and adds memory usage for no real benefit.

IsExpired, GetNumWaitStates);
if (InitSearch == HazardFound)
return WaitStates;
if (InitSearch == HazardExpired)
return std::numeric_limits<int>::max();

return getWaitStatesSinceImpl(IsHazard, MBB, WaitStates, IsExpired,
GetNumWaitStates);
}

static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
const MachineInstr *MI, IsExpiredFn IsExpired) {
DenseSet<const MachineBasicBlock *> Visited;
return getWaitStatesSince(IsHazard, MI->getParent(),
std::next(MI->getReverseIterator()),
0, IsExpired, Visited);
std::next(MI->getReverseIterator()), 0, IsExpired);
}

int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
Expand Down Expand Up @@ -1524,10 +1583,9 @@ bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
return SIInstrInfo::isVALU(MI) ? 1 : 0;
};

DenseSet<const MachineBasicBlock *> Visited;
auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(),
std::next(MI->getReverseIterator()), 0,
IsExpiredFn, Visited, GetWaitStatesFn);
IsExpiredFn, GetWaitStatesFn);

// Transcendentals can execute in parallel to other VALUs.
// This makes va_vdst count unusable with a mixture of VALU and TRANS.
Expand Down Expand Up @@ -3234,10 +3292,9 @@ bool GCNHazardRecognizer::fixVALUReadSGPRHazard(MachineInstr *MI) {
};

// Check for the hazard.
DenseSet<const MachineBasicBlock *> Visited;
int WaitStates = ::getWaitStatesSince(IsHazardFn, MI->getParent(),
std::next(MI->getReverseIterator()), 0,
IsExpiredFn, Visited, WaitStatesFn);
IsExpiredFn, WaitStatesFn);

if (WaitStates >= SALUExpiryCount)
return false;
Expand Down