Skip to content

Commit fbbd3d0

Browse files
committed
[AMDGPU]: Fall back to default mutations when iglp is not applied
Change-Id: I2e1f4f4610275d3d629c2b34ced331d78ea0ca06
1 parent 93afd8f commit fbbd3d0

File tree

5 files changed

+53
-15
lines changed

5 files changed

+53
-15
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2348,6 +2348,8 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
23482348

23492349
ScheduleDAGMI *DAG;
23502350

2351+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations;
2352+
23512353
// Organize lists of SchedGroups by their SyncID. SchedGroups /
23522354
// SCHED_GROUP_BARRIERs with different SyncIDs will have no edges added
23532355
// between then.
@@ -2390,7 +2392,10 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
23902392
AMDGPU::SchedulingPhase Phase = AMDGPU::SchedulingPhase::Initial;
23912393

23922394
IGroupLPDAGMutation() = default;
2393-
IGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) : Phase(Phase) {}
2395+
IGroupLPDAGMutation(
2396+
AMDGPU::SchedulingPhase Phase,
2397+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations)
2398+
: SavedMutations(SavedMutations), Phase(Phase) {}
23942399
};
23952400

23962401
unsigned SchedGroup::NumSchedGroups = 0;
@@ -2607,6 +2612,13 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
26072612
PS.solve();
26082613
return;
26092614
}
2615+
2616+
if (!SavedMutations)
2617+
return;
2618+
2619+
// We did not apply a mutation, fall back to SavedMutations
2620+
for (auto &m : *SavedMutations)
2621+
m->apply(DAG);
26102622
}
26112623

26122624
void IGroupLPDAGMutation::addSchedBarrierEdges(SUnit &SchedBarrier) {
@@ -2703,9 +2715,10 @@ namespace llvm {
27032715
/// same scheduling region (e.g. pre and post-RA scheduling / multiple
27042716
/// scheduling "phases"), we can reenter this mutation framework more than once
27052717
/// for a given region.
2706-
std::unique_ptr<ScheduleDAGMutation>
2707-
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase) {
2708-
return std::make_unique<IGroupLPDAGMutation>(Phase);
2718+
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
2719+
AMDGPU::SchedulingPhase Phase,
2720+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations) {
2721+
return std::make_unique<IGroupLPDAGMutation>(Phase, SavedMutations);
27092722
}
27102723

27112724
} // end namespace llvm

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ namespace AMDGPU {
2020
enum class SchedulingPhase { Initial, PreRAReentry, PostRA };
2121
} // namespace AMDGPU
2222

23-
std::unique_ptr<ScheduleDAGMutation>
24-
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase);
23+
std::unique_ptr<ScheduleDAGMutation> createIGroupLPDAGMutation(
24+
AMDGPU::SchedulingPhase Phase,
25+
std::vector<std::unique_ptr<ScheduleDAGMutation>> *SavedMutations);
2526

2627
} // namespace llvm
2728

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,8 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
579579
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
580580
if (ST.shouldClusterStores())
581581
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
582-
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
582+
DAG->addMutation(
583+
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
583584
DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
584585
DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
585586
return DAG;
@@ -589,7 +590,8 @@ static ScheduleDAGInstrs *
589590
createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
590591
ScheduleDAGMILive *DAG =
591592
new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
592-
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
593+
DAG->addMutation(
594+
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial, nullptr));
593595
return DAG;
594596
}
595597

@@ -1097,7 +1099,7 @@ GCNTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
10971099
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
10981100
if (ST.shouldClusterStores())
10991101
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
1100-
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
1102+
DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA, nullptr));
11011103
if ((EnableVOPD.getNumOccurrences() ||
11021104
getOptLevel() >= CodeGenOptLevel::Less) &&
11031105
EnableVOPD)

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,8 +1048,8 @@ bool UnclusteredHighRPStage::initGCNSchedStage() {
10481048
return false;
10491049

10501050
SavedMutations.swap(DAG.Mutations);
1051-
DAG.addMutation(
1052-
createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PreRAReentry));
1051+
DAG.addMutation(createIGroupLPDAGMutation(
1052+
AMDGPU::SchedulingPhase::PreRAReentry, nullptr));
10531053

10541054
InitialOccupancy = DAG.MinOccupancy;
10551055
// Aggressivly try to reduce register pressure in the unclustered high RP
@@ -1191,7 +1191,8 @@ bool GCNSchedStage::initGCNRegion() {
11911191
StageID == GCNSchedStageID::ILPInitialSchedule;
11921192
DAG.addMutation(createIGroupLPDAGMutation(
11931193
IsInitialStage ? AMDGPU::SchedulingPhase::Initial
1194-
: AMDGPU::SchedulingPhase::PreRAReentry));
1194+
: AMDGPU::SchedulingPhase::PreRAReentry,
1195+
&SavedMutations));
11951196
}
11961197

11971198
return true;
@@ -2062,15 +2063,16 @@ void GCNPostScheduleDAGMILive::schedule() {
20622063
if (HasIGLPInstrs) {
20632064
SavedMutations.clear();
20642065
SavedMutations.swap(Mutations);
2065-
addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
2066+
addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA,
2067+
&SavedMutations));
20662068
}
20672069

20682070
ScheduleDAGMI::schedule();
2069-
}
20702071

2071-
void GCNPostScheduleDAGMILive::finalizeSchedule() {
20722072
if (HasIGLPInstrs)
20732073
SavedMutations.swap(Mutations);
2074+
}
20742075

2076+
void GCNPostScheduleDAGMILive::finalizeSchedule() {
20752077
ScheduleDAGMI::finalizeSchedule();
20762078
}

llvm/test/CodeGen/AMDGPU/cluster-flat-loads.mir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,23 @@ body: |
1919
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
2020
%3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
2121
...
22+
---
23+
# GCN-LABEL: name: cluster_flat_loads_iglp_opt
24+
# GCN: FLAT_LOAD_DWORD %0, 0
25+
# GCN-NEXT: FLAT_LOAD_DWORD %0, 4
26+
# GCN-NEXT: V_ADD_F32_e64
27+
name: cluster_flat_loads_iglp_opt
28+
tracksRegLiveness: true
29+
registers:
30+
- { id: 0, class: vreg_64 }
31+
- { id: 1, class: vgpr_32 }
32+
- { id: 2, class: vgpr_32 }
33+
- { id: 3, class: vgpr_32 }
34+
body: |
35+
bb.0:
36+
%0 = IMPLICIT_DEF
37+
%1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
38+
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
39+
%3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32))
40+
IGLP_OPT 2
41+
...

0 commit comments

Comments
 (0)