Skip to content

Commit 0993c38

Browse files
committed
[AMDGPU] Add SchedGroupMask::PACK
1 parent db3d077 commit 0993c38

File tree

1 file changed

+26
-15
lines changed

1 file changed

+26
-15
lines changed

llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,9 @@ enum class SchedGroupMask {
7676
DS_READ = 1u << 8,
7777
DS_WRITE = 1u << 9,
7878
TRANS = 1u << 10,
79+
PACK = 1u << 11,
7980
ALL = ALU | VALU | SALU | MFMA | VMEM | VMEM_READ | VMEM_WRITE | DS |
80-
DS_READ | DS_WRITE | TRANS,
81+
DS_READ | DS_WRITE | TRANS | PACK,
8182
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
8283
};
8384

@@ -112,7 +113,7 @@ class InstructionRule {
112113
virtual ~InstructionRule() = default;
113114
};
114115

115-
using SUnitsToCandidateSGsMap = DenseMap<SUnit *, SmallVector<int, 4>>;
116+
typedef DenseMap<SUnit *, SmallVector<int, 4>> SUnitsToCandidateSGsMap;
116117

117118
// Classify instructions into groups to enable fine tuned control over the
118119
// scheduler. These groups may be more specific than current SchedModel
@@ -190,9 +191,13 @@ class SchedGroup {
190191
// Returns true if the SU matches all rules
191192
bool allowedByRules(const SUnit *SU,
192193
SmallVectorImpl<SchedGroup> &SyncPipe) const {
193-
for (auto &Rule : Rules) {
194-
if (!Rule.get()->apply(SU, Collection, SyncPipe))
194+
if (Rules.empty())
195+
return true;
196+
for (size_t I = 0; I < Rules.size(); I++) {
197+
auto TheRule = Rules[I].get();
198+
if (!TheRule->apply(SU, Collection, SyncPipe)) {
195199
return false;
200+
}
196201
}
197202
return true;
198203
}
@@ -240,8 +245,8 @@ class SchedGroup {
240245
}
241246
};
242247

243-
using SUToCandSGsPair = std::pair<SUnit *, SmallVector<int, 4>>;
244-
using SUsToCandSGsVec = SmallVector<SUToCandSGsPair, 4>;
248+
typedef std::pair<SUnit *, SmallVector<int, 4>> SUToCandSGsPair;
249+
typedef SmallVector<SUToCandSGsPair, 4> SUsToCandSGsVec;
245250

246251
// The PipelineSolver is used to assign SUnits to SchedGroups in a pipeline
247252
// in non-trivial cases. For example, if the requested pipeline is
@@ -290,7 +295,7 @@ class PipelineSolver {
290295
uint64_t BranchesExplored = 0;
291296

292297
// The direction in which we process the candidate SchedGroups per SU
293-
bool IsBottomUp = true;
298+
bool IsBottomUp = 1;
294299

295300
// Update indices to fit next conflicting instruction
296301
void advancePosition();
@@ -344,7 +349,7 @@ class PipelineSolver {
344349

345350
PipelineSolver(DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
346351
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
347-
ScheduleDAGMI *DAG, bool IsBottomUp = true)
352+
ScheduleDAGMI *DAG, bool IsBottomUp = 1)
348353
: DAG(DAG), SyncedInstrs(SyncedInstrs),
349354
SyncedSchedGroups(SyncedSchedGroups), IsBottomUp(IsBottomUp) {
350355

@@ -836,7 +841,7 @@ class IGLPStrategy {
836841
virtual bool shouldApplyStrategy(ScheduleDAGInstrs *DAG,
837842
AMDGPU::SchedulingPhase Phase) = 0;
838843

839-
bool IsBottomUp = true;
844+
bool IsBottomUp = 1;
840845

841846
IGLPStrategy(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
842847
: DAG(DAG), TII(TII) {}
@@ -859,7 +864,7 @@ class MFMASmallGemmOpt final : public IGLPStrategy {
859864

860865
MFMASmallGemmOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
861866
: IGLPStrategy(DAG, TII) {
862-
IsBottomUp = true;
867+
IsBottomUp = 1;
863868
}
864869
};
865870

@@ -1328,7 +1333,7 @@ class MFMAExpInterleaveOpt final : public IGLPStrategy {
13281333

13291334
MFMAExpInterleaveOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
13301335
: IGLPStrategy(DAG, TII) {
1331-
IsBottomUp = false;
1336+
IsBottomUp = 0;
13321337
}
13331338
};
13341339

@@ -1460,7 +1465,9 @@ bool MFMAExpInterleaveOpt::analyzeDAG(const SIInstrInfo *TII) {
14601465

14611466
MFMAChains = 0;
14621467
for (auto &MFMAPipeSU : MFMAPipeSUs) {
1463-
if (is_contained(MFMAChainSeeds, MFMAPipeSU))
1468+
if (MFMAChainSeeds.size() &&
1469+
std::find(MFMAChainSeeds.begin(), MFMAChainSeeds.end(), MFMAPipeSU) !=
1470+
MFMAChainSeeds.end())
14641471
continue;
14651472
if (!std::any_of(MFMAPipeSU->Preds.begin(), MFMAPipeSU->Preds.end(),
14661473
[&TII](SDep &Succ) {
@@ -2039,7 +2046,7 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
20392046

20402047
MFMASmallGemmSingleWaveOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
20412048
: IGLPStrategy(DAG, TII) {
2042-
IsBottomUp = false;
2049+
IsBottomUp = 0;
20432050
}
20442051
};
20452052

@@ -2349,7 +2356,7 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
23492356
// created SchedGroup first, and will consider that as the ultimate
23502357
// predecessor group when linking. TOP_DOWN instead links and processes the
23512358
// first created SchedGroup first.
2352-
bool IsBottomUp = true;
2359+
bool IsBottomUp = 1;
23532360

23542361
// The scheduling phase this application of IGLP corresponds with.
23552362
AMDGPU::SchedulingPhase Phase = AMDGPU::SchedulingPhase::Initial;
@@ -2420,6 +2427,10 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
24202427
TII->isTRANS(MI))
24212428
Result = true;
24222429

2430+
else if (((SGMask & SchedGroupMask::PACK) != SchedGroupMask::NONE) &&
2431+
TII->isVOP3P(MI) && !TII->isMFMAorWMMA(MI))
2432+
Result = true;
2433+
24232434
LLVM_DEBUG(
24242435
dbgs() << "For SchedGroup with mask " << format_hex((int)SGMask, 10, true)
24252436
<< (Result ? " could classify " : " unable to classify ") << MI);
@@ -2444,7 +2455,7 @@ int SchedGroup::link(SUnit &SU, bool MakePred,
24442455
// the A->B edge impossible, otherwise it returns true;
24452456
bool Added = tryAddEdge(A, B);
24462457
if (Added)
2447-
AddedEdges.emplace_back(A, B);
2458+
AddedEdges.push_back(std::pair(A, B));
24482459
else
24492460
++MissedEdges;
24502461
}

0 commit comments

Comments
 (0)