Skip to content

Commit eb110ab

Browse files
committed
[AIEX] premisched: do not delay SUs if this can form a cycle
We now check whether a SU can be safely delayed without forming a cycle of SUs delaying each other indefinitely.
1 parent 6096503 commit eb110ab

File tree

3 files changed

+94
-2
lines changed

3 files changed

+94
-2
lines changed

llvm/lib/Target/AIE/AIEMachineScheduler.cpp

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@ static cl::opt<bool>
8080
InterBlockAlignment("aie-interblock-alignment", cl::init(true),
8181
cl::desc("Allow for alignment of successor blocks"));
8282

83+
namespace {
84+
// A sentinel value to represent an unknown SUnit.
85+
const constexpr unsigned UnknownSUNum = ~0;
86+
} // namespace
87+
8388
static AIEHazardRecognizer *getAIEHazardRecognizer(const SchedBoundary &Zone) {
8489
return static_cast<AIEHazardRecognizer *>(Zone.HazardRec);
8590
}
@@ -771,6 +776,7 @@ void AIEPreRASchedStrategy::enterRegion(MachineBasicBlock *BB,
771776
CurMBB = BB;
772777
RegionBegin = Begin;
773778
RegionEnd = End;
779+
SUDelayerMap.resize(std::distance(Begin, End), UnknownSUNum);
774780
}
775781

776782
void AIEPreRASchedStrategy::leaveRegion(const SUnit &ExitSU) {
@@ -795,6 +801,7 @@ void AIEPreRASchedStrategy::leaveRegion(const SUnit &ExitSU) {
795801
CurMBB = nullptr;
796802
RegionBegin = nullptr;
797803
RegionEnd = nullptr;
804+
SUDelayerMap.clear();
798805
}
799806

800807
PressureDiff estimatePressureDiff(const SUnit &SU,
@@ -883,8 +890,34 @@ bool AIEPreRASchedStrategy::isAvailableNode(SUnit &SU, SchedBoundary &Zone,
883890

884891
// The node will likely cause a spill, only consider it schedule-able if
885892
// there is no pending node that can reduce the register pressure.
886-
return findPressureReducer(WorstPC.getPSet(), Zone.Pending.elements(),
887-
BotRPT) == nullptr;
893+
if (const SUnit *PendingPressureReducer = findPressureReducer(
894+
WorstPC.getPSet(), Zone.Pending.elements(), BotRPT);
895+
PendingPressureReducer && canBeDelayed(SU, *PendingPressureReducer)) {
896+
LLVM_DEBUG(dbgs() << "** Delaying SU(" << SU.NodeNum << "): Waiting for SU("
897+
<< PendingPressureReducer->NodeNum << ")\n");
898+
899+
// Keep track of PendingPressureReducer to avoid cycles of SUs
900+
// delaying each other.
901+
SUDelayerMap[SU.NodeNum] = PendingPressureReducer->NodeNum;
902+
return false;
903+
}
904+
905+
// Can't prove a pending SU will help reduce reg pressure, keep as available.
906+
return true;
907+
}
908+
909+
bool AIEPreRASchedStrategy::canBeDelayed(const SUnit &DelayedSU,
910+
const SUnit &Delayer) const {
911+
std::function<bool(unsigned)> Impl = [&](unsigned SUNum) {
912+
if (SUNum == UnknownSUNum)
913+
return true;
914+
if (SUNum == DelayedSU.NodeNum)
915+
return false;
916+
return Impl(SUDelayerMap[SUNum]);
917+
};
918+
// If SU is delayed by another instruction that is eventually waiting on SU
919+
// itself, do not keep delaying SU otherwise this creates an infinite loop.
920+
return Impl(Delayer.NodeNum);
888921
}
889922

890923
bool AIEPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,

llvm/lib/Target/AIE/AIEMachineScheduler.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ class AIEPreRASchedStrategy : public GenericScheduler {
166166
bool VerifyReadyCycle) override;
167167

168168
protected:
169+
/// Whether \p DelayedSU can be safely delayed without forming a cycle
170+
/// of SUs delaying each other indefinitely.
171+
bool canBeDelayed(const SUnit &DelayedSU, const SUnit &Delayer) const;
172+
169173
/// Apply a set of heuristics to a new candidate for scheduling.
170174
///
171175
/// \param Cand provides the policy and current best candidate.
@@ -180,6 +184,11 @@ class AIEPreRASchedStrategy : public GenericScheduler {
180184
MachineBasicBlock *CurMBB = nullptr;
181185
MachineBasicBlock::iterator RegionBegin = nullptr;
182186
MachineBasicBlock::iterator RegionEnd = nullptr;
187+
188+
/// Keeps track of SUs that have been delayed, waiting on another
189+
/// pressure-reducing SU to be scheduled first.
190+
/// SUDelayerMap[0] = 2 means that SU(0) is waiting on SU(2).
191+
std::vector<unsigned> SUDelayerMap;
183192
};
184193

185194
/// An extension to ScheduleDAGMI that provides callbacks on region entry/exit
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
2+
# See https://llvm.org/LICENSE.txt for license information.
3+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
#
5+
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
6+
7+
# RUN: llc -march=aie2 -run-pass=machine-scheduler %s -o -
8+
9+
10+
# This is testing a complex case where the reg pressure is critical for
11+
# both accumulators and vectors. In bb.1, the scheduler will try to delay
12+
# scheduling "%31:vec256 = VCONV_BF16_FP32 %21:acc512" because it increases the
13+
# pressure on accumulators and "%30:acc512 = VCONV_FP32_BF16 %20:vec256" can
14+
# help reducing that pressure. But the latter increases the pressure on vectors
15+
# so the scheduler will try to delay it because the former can help reduce that
16+
# pressure on vectors.
17+
# This can be an infinite loop if not careful.
18+
---
19+
name: tied_pressure_reducers
20+
tracksRegLiveness: true
21+
body: |
22+
bb.0.entry:
23+
liveins: $y2, $bml0, $wl0
24+
25+
%1:vec1024 = COPY $y2
26+
%2:vec1024 = COPY $y2
27+
%3:vec1024 = COPY $y2
28+
%4:vec1024 = COPY $y2
29+
%5:vec1024 = COPY $y2
30+
%6:vec1024 = COPY $y2
31+
%11:acc1024 = COPY $y2
32+
%12:acc1024 = COPY $y2
33+
%13:acc1024 = COPY $y2
34+
%14:acc1024 = COPY $y2
35+
%15:acc1024 = COPY $y2
36+
%16:acc1024 = COPY $y2
37+
%17:acc1024 = COPY $y2
38+
%18:acc1024 = COPY $y2
39+
%19:acc1024 = COPY $y2
40+
41+
%20:vec256 = COPY $wl0
42+
%21:acc512 = COPY $bml0
43+
PseudoJ_jump_imm %bb.1
44+
45+
bb.1:
46+
%30:acc512 = VCONV_FP32_BF16 %20
47+
%31:vec256 = VCONV_BF16_FP32 %21, implicit-def $srf2fflags, implicit $crf2fmask, implicit $crrnd
48+
49+
PseudoRET implicit $lr, implicit %30, implicit %31, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19
50+
...

0 commit comments

Comments
 (0)