@@ -76,8 +76,9 @@ enum class SchedGroupMask {
7676 DS_READ = 1u << 8 ,
7777 DS_WRITE = 1u << 9 ,
7878 TRANS = 1u << 10 ,
79+ PACK = 1u << 11 ,
7980 ALL = ALU | VALU | SALU | MFMA | VMEM | VMEM_READ | VMEM_WRITE | DS |
80- DS_READ | DS_WRITE | TRANS,
81+ DS_READ | DS_WRITE | TRANS | PACK ,
8182 LLVM_MARK_AS_BITMASK_ENUM (/* LargestFlag = */ ALL)
8283};
8384
@@ -112,7 +113,7 @@ class InstructionRule {
112113 virtual ~InstructionRule () = default ;
113114};
114115
115- using SUnitsToCandidateSGsMap = DenseMap<SUnit *, SmallVector<int , 4 >>;
116+ typedef DenseMap<SUnit *, SmallVector<int , 4 >> SUnitsToCandidateSGsMap ;
116117
117118// Classify instructions into groups to enable fine tuned control over the
118119// scheduler. These groups may be more specific than current SchedModel
@@ -190,9 +191,13 @@ class SchedGroup {
190191 // Returns true if the SU matches all rules
191192 bool allowedByRules (const SUnit *SU,
192193 SmallVectorImpl<SchedGroup> &SyncPipe) const {
193- for (auto &Rule : Rules) {
194- if (!Rule.get ()->apply (SU, Collection, SyncPipe))
194+ if (Rules.empty ())
195+ return true ;
196+ for (size_t I = 0 ; I < Rules.size (); I++) {
197+ auto TheRule = Rules[I].get ();
198+ if (!TheRule->apply (SU, Collection, SyncPipe)) {
195199 return false ;
200+ }
196201 }
197202 return true ;
198203 }
@@ -240,8 +245,8 @@ class SchedGroup {
240245 }
241246};
242247
243- using SUToCandSGsPair = std::pair<SUnit *, SmallVector<int , 4 >>;
244- using SUsToCandSGsVec = SmallVector<SUToCandSGsPair, 4 >;
248+ typedef std::pair<SUnit *, SmallVector<int , 4 >> SUToCandSGsPair ;
249+ typedef SmallVector<SUToCandSGsPair, 4 > SUsToCandSGsVec ;
245250
246251// The PipelineSolver is used to assign SUnits to SchedGroups in a pipeline
247252// in non-trivial cases. For example, if the requested pipeline is
@@ -290,7 +295,7 @@ class PipelineSolver {
290295 uint64_t BranchesExplored = 0 ;
291296
292297 // The direction in which we process the candidate SchedGroups per SU
293- bool IsBottomUp = true ;
298+ bool IsBottomUp = 1 ;
294299
295300 // Update indices to fit next conflicting instruction
296301 void advancePosition ();
@@ -344,7 +349,7 @@ class PipelineSolver {
344349
345350 PipelineSolver (DenseMap<int , SmallVector<SchedGroup, 4 >> &SyncedSchedGroups,
346351 DenseMap<int , SUnitsToCandidateSGsMap> &SyncedInstrs,
347- ScheduleDAGMI *DAG, bool IsBottomUp = true )
352+ ScheduleDAGMI *DAG, bool IsBottomUp = 1 )
348353 : DAG(DAG), SyncedInstrs(SyncedInstrs),
349354 SyncedSchedGroups (SyncedSchedGroups), IsBottomUp(IsBottomUp) {
350355
@@ -836,7 +841,7 @@ class IGLPStrategy {
836841 virtual bool shouldApplyStrategy (ScheduleDAGInstrs *DAG,
837842 AMDGPU::SchedulingPhase Phase) = 0;
838843
839- bool IsBottomUp = true ;
844+ bool IsBottomUp = 1 ;
840845
841846 IGLPStrategy (ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
842847 : DAG(DAG), TII(TII) {}
@@ -859,7 +864,7 @@ class MFMASmallGemmOpt final : public IGLPStrategy {
859864
860865 MFMASmallGemmOpt (ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
861866 : IGLPStrategy(DAG, TII) {
862- IsBottomUp = true ;
867+ IsBottomUp = 1 ;
863868 }
864869};
865870
@@ -1328,7 +1333,7 @@ class MFMAExpInterleaveOpt final : public IGLPStrategy {
13281333
13291334 MFMAExpInterleaveOpt (ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
13301335 : IGLPStrategy(DAG, TII) {
1331- IsBottomUp = false ;
1336+ IsBottomUp = 0 ;
13321337 }
13331338};
13341339
@@ -1460,7 +1465,9 @@ bool MFMAExpInterleaveOpt::analyzeDAG(const SIInstrInfo *TII) {
14601465
14611466 MFMAChains = 0 ;
14621467 for (auto &MFMAPipeSU : MFMAPipeSUs) {
1463- if (is_contained (MFMAChainSeeds, MFMAPipeSU))
1468+ if (MFMAChainSeeds.size () &&
1469+ std::find (MFMAChainSeeds.begin (), MFMAChainSeeds.end (), MFMAPipeSU) !=
1470+ MFMAChainSeeds.end ())
14641471 continue ;
14651472 if (!std::any_of (MFMAPipeSU->Preds .begin (), MFMAPipeSU->Preds .end (),
14661473 [&TII](SDep &Succ) {
@@ -2039,7 +2046,7 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
20392046
20402047 MFMASmallGemmSingleWaveOpt (ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
20412048 : IGLPStrategy(DAG, TII) {
2042- IsBottomUp = false ;
2049+ IsBottomUp = 0 ;
20432050 }
20442051};
20452052
@@ -2349,7 +2356,7 @@ class IGroupLPDAGMutation : public ScheduleDAGMutation {
23492356 // created SchedGroup first, and will consider that as the ultimate
23502357 // predecessor group when linking. TOP_DOWN instead links and processes the
23512358 // first created SchedGroup first.
2352- bool IsBottomUp = true ;
2359+ bool IsBottomUp = 1 ;
23532360
23542361 // The scheduling phase this application of IGLP corresponds with.
23552362 AMDGPU::SchedulingPhase Phase = AMDGPU::SchedulingPhase::Initial;
@@ -2420,6 +2427,10 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
24202427 TII->isTRANS (MI))
24212428 Result = true ;
24222429
2430+ else if (((SGMask & SchedGroupMask::PACK) != SchedGroupMask::NONE) &&
2431+ TII->isVOP3P (MI) && !TII->isMFMAorWMMA (MI))
2432+ Result = true ;
2433+
24232434 LLVM_DEBUG (
24242435 dbgs () << " For SchedGroup with mask " << format_hex ((int )SGMask, 10 , true )
24252436 << (Result ? " could classify " : " unable to classify " ) << MI);
@@ -2444,7 +2455,7 @@ int SchedGroup::link(SUnit &SU, bool MakePred,
24442455 // the A->B edge impossible, otherwise it returns true;
24452456 bool Added = tryAddEdge (A, B);
24462457 if (Added)
2447- AddedEdges.emplace_back ( A, B);
2458+ AddedEdges.push_back ( std::pair ( A, B) );
24482459 else
24492460 ++MissedEdges;
24502461 }
0 commit comments