Skip to content

Commit 711a295

Browse files
authored
[AMDGPU] Ignore wavefront barrier latency during scheduling DAG mutation (#168500)
Do not add latency for wavefront and singlethread scope fences during barrier latency DAG mutation. These scopes do not typically introduce any latency and adjusting schedules based on them significantly impacts latency hiding.
1 parent fddfc70 commit 711a295

File tree

4 files changed

+301
-11
lines changed

4 files changed

+301
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,17 @@ using namespace llvm;
2727
namespace {
2828

2929
class BarrierLatency : public ScheduleDAGMutation {
30+
private:
31+
SmallSet<SyncScope::ID, 4> IgnoredScopes;
32+
3033
public:
31-
BarrierLatency() = default;
34+
BarrierLatency(MachineFunction *MF) {
35+
LLVMContext &Context = MF->getFunction().getContext();
36+
IgnoredScopes.insert(SyncScope::SingleThread);
37+
IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront"));
38+
IgnoredScopes.insert(Context.getOrInsertSyncScopeID("wavefront-one-as"));
39+
IgnoredScopes.insert(Context.getOrInsertSyncScopeID("singlethread-one-as"));
40+
}
3241
void apply(ScheduleDAGInstrs *DAG) override;
3342
};
3443

@@ -40,8 +49,11 @@ void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
4049
continue;
4150

4251
// Update latency on barrier edges of ATOMIC_FENCE.
43-
// We don't consider the scope of the fence or type of instruction
44-
// involved in the barrier edge.
52+
// Ignore scopes not expected to have any latency.
53+
SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
54+
if (IgnoredScopes.contains(SSID))
55+
continue;
56+
4557
for (SDep &PredDep : SU.Preds) {
4658
if (!PredDep.isBarrier())
4759
continue;
@@ -68,6 +80,6 @@ void BarrierLatency::apply(ScheduleDAGInstrs *DAG) {
6880
} // end namespace
6981

7082
std::unique_ptr<ScheduleDAGMutation>
71-
llvm::createAMDGPUBarrierLatencyDAGMutation() {
72-
return std::make_unique<BarrierLatency>();
83+
llvm::createAMDGPUBarrierLatencyDAGMutation(MachineFunction *MF) {
84+
return std::make_unique<BarrierLatency>(MF);
7385
}

llvm/lib/Target/AMDGPU/AMDGPUBarrierLatency.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414

1515
namespace llvm {
1616

17-
std::unique_ptr<ScheduleDAGMutation> createAMDGPUBarrierLatencyDAGMutation();
17+
class MachineFunction;
18+
19+
std::unique_ptr<ScheduleDAGMutation>
20+
createAMDGPUBarrierLatencyDAGMutation(MachineFunction *MF);
1821

1922
} // namespace llvm
2023

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
647647
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
648648
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
649649
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
650-
DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
650+
DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
651651
return DAG;
652652
}
653653

@@ -668,7 +668,7 @@ createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C) {
668668
if (ST.shouldClusterStores())
669669
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
670670
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
671-
DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
671+
DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
672672
return DAG;
673673
}
674674

@@ -1209,7 +1209,7 @@ GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
12091209
EnableVOPD)
12101210
DAG->addMutation(createVOPDPairingMutation());
12111211
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
1212-
DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation());
1212+
DAG->addMutation(createAMDGPUBarrierLatencyDAGMutation(C->MF));
12131213
return DAG;
12141214
}
12151215
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)