1717#include " GCNSubtarget.h"
1818#include " llvm/ADT/SmallSet.h"
1919#include " llvm/CodeGen/MachineFunctionPass.h"
20+ #include < deque>
2021
2122using namespace llvm ;
2223
@@ -50,6 +51,7 @@ class SIPostRABundler {
5051 bool run (MachineFunction &MF);
5152
5253private:
54+ const SIInstrInfo *TII = nullptr ;
5355 const SIRegisterInfo *TRI;
5456
5557 SmallSet<Register, 16 > Defs;
@@ -60,6 +62,9 @@ class SIPostRABundler {
6062 bool isBundleCandidate (const MachineInstr &MI) const ;
6163 bool isDependentLoad (const MachineInstr &MI) const ;
6264 bool canBundle (const MachineInstr &MI, const MachineInstr &NextMI) const ;
65+ void reorderLoads (MachineBasicBlock &MBB,
66+ MachineBasicBlock::instr_iterator &BundleStart,
67+ MachineBasicBlock::instr_iterator Next);
6368};
6469
6570constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
@@ -129,6 +134,141 @@ bool SIPostRABundler::canBundle(const MachineInstr &MI,
129134 !isDependentLoad (NextMI));
130135}
131136
137+ static Register getDef (MachineInstr &MI) {
138+ assert (MI.getNumExplicitDefs () > 0 );
139+ return MI.defs ().begin ()->getReg ();
140+ }
141+
142+ void SIPostRABundler::reorderLoads (
143+ MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &BundleStart,
144+ MachineBasicBlock::instr_iterator Next) {
145+ // Don't reorder ALU, store or scalar clauses.
146+ if (!BundleStart->mayLoad () || BundleStart->mayStore () ||
147+ SIInstrInfo::isSMRD (*BundleStart) || !BundleStart->getNumExplicitDefs ())
148+ return ;
149+
150+ // Search to find the usage distance of each defined register in the clause.
151+ const unsigned SearchDistance = std::max (Defs.size (), 100UL );
152+ SmallDenseMap<Register, unsigned > UseDistance;
153+ unsigned MaxDistance = 0 ;
154+ for (MachineBasicBlock::iterator SearchI = Next;
155+ SearchI != MBB.end () && MaxDistance < SearchDistance &&
156+ UseDistance.size () < Defs.size ();
157+ ++SearchI, ++MaxDistance) {
158+ for (Register Reg : Defs) {
159+ if (UseDistance.contains (Reg))
160+ continue ;
161+ if (SearchI->readsRegister (Reg, TRI))
162+ UseDistance[Reg] = MaxDistance;
163+ }
164+ }
165+
166+ if (UseDistance.empty ())
167+ return ;
168+
169+ LLVM_DEBUG (dbgs () << " Try bundle reordering\n " );
170+
171+ // Build schedule based on use distance of register uses.
172+ // Attempt to preserve exist order (NativeOrder) where possible.
173+ std::deque<std::pair<MachineInstr *, unsigned >> Schedule;
174+ unsigned NativeOrder = 0 , LastOrder = 0 ;
175+ bool Reordered = false ;
176+ for (auto II = BundleStart; II != Next; ++II, ++NativeOrder) {
177+ // Bail out if we encounter anything that seems risky to reorder.
178+ if (!II->getNumExplicitDefs () || II->isKill () ||
179+ llvm::any_of (II->memoperands (), [&](const MachineMemOperand *MMO) {
180+ return MMO->isAtomic () || MMO->isVolatile ();
181+ })) {
182+ LLVM_DEBUG (dbgs () << " Abort\n " );
183+ return ;
184+ }
185+
186+ Register Reg = getDef (*II);
187+ unsigned NewOrder =
188+ UseDistance.contains (Reg) ? UseDistance[Reg] : MaxDistance;
189+ LLVM_DEBUG (dbgs () << " Order: " << NewOrder << " ," << NativeOrder
190+ << " , MI: " << *II);
191+ unsigned Order = (NewOrder << 16 | NativeOrder);
192+ Schedule.emplace_back (&*II, Order);
193+ Reordered |= Order < LastOrder;
194+ LastOrder = Order;
195+ }
196+
197+ // No reordering found.
198+ if (!Reordered) {
199+ LLVM_DEBUG (dbgs () << " No changes\n " );
200+ return ;
201+ }
202+
203+ // Apply sort on new ordering.
204+ std::sort (Schedule.begin (), Schedule.end (),
205+ [](std::pair<MachineInstr *, unsigned > A,
206+ std::pair<MachineInstr *, unsigned > B) {
207+ return A.second < B.second ;
208+ });
209+
210+ // Rebuild clause order.
211+ // Schedule holds ideal order for the load operations; however, each def
212+ // can only be scheduled when it will no longer clobber any uses.
213+ SmallVector<MachineInstr *> Clause;
214+ while (!Schedule.empty ()) {
215+ // Try to schedule next instruction in schedule.
216+ // Iterate until we find something that can be placed.
217+ auto It = Schedule.begin ();
218+ while (It != Schedule.end ()) {
219+ MachineInstr *MI = It->first ;
220+ LLVM_DEBUG (dbgs () << " Try schedule: " << *MI);
221+
222+ if (MI->getNumExplicitDefs () == 0 ) {
223+ // No defs, always schedule.
224+ LLVM_DEBUG (dbgs () << " Trivially OK\n " );
225+ break ;
226+ }
227+
228+ Register DefReg = getDef (*MI);
229+ bool DefRegHasUse = false ;
230+ for (auto SearchIt = std::next (It);
231+ SearchIt != Schedule.end () && !DefRegHasUse; ++SearchIt)
232+ DefRegHasUse = SearchIt->first ->readsRegister (DefReg, TRI);
233+ if (DefRegHasUse) {
234+ // A future use would be clobbered; try next instruction in the
235+ // schedule.
236+ LLVM_DEBUG (dbgs () << " Clobbers uses\n " );
237+ It++;
238+ continue ;
239+ }
240+
241+ // Safe to schedule.
242+ LLVM_DEBUG (dbgs () << " OK!\n " );
243+ break ;
244+ }
245+
246+ // Place schedule instruction into clause order.
247+ assert (It != Schedule.end ());
248+ MachineInstr *MI = It->first ;
249+ Schedule.erase (It);
250+ Clause.push_back (MI);
251+
252+ // Clear kill flags for later uses.
253+ for (auto &Use : MI->all_uses ()) {
254+ if (!Use.isReg () || !Use.isKill ())
255+ continue ;
256+ Register UseReg = Use.getReg ();
257+ if (llvm::any_of (Schedule, [&](std::pair<MachineInstr *, unsigned > &SI) {
258+ return SI.first ->readsRegister (UseReg, TRI);
259+ }))
260+ Use.setIsKill (false );
261+ }
262+ }
263+
264+ // Apply order to instructions.
265+ for (MachineInstr *MI : Clause)
266+ MI->moveBefore (&*Next);
267+
268+ // Update start of bundle.
269+ BundleStart = Clause[0 ]->getIterator ();
270+ }
271+
132272bool SIPostRABundlerLegacy::runOnMachineFunction (MachineFunction &MF) {
133273 if (skipFunction (MF.getFunction ()))
134274 return false ;
@@ -143,6 +283,8 @@ PreservedAnalyses SIPostRABundlerPass::run(MachineFunction &MF,
143283
144284bool SIPostRABundler::run (MachineFunction &MF) {
145285
286+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
287+ TII = ST.getInstrInfo ();
146288 TRI = MF.getSubtarget <GCNSubtarget>().getRegisterInfo ();
147289 BitVector BundleUsedRegUnits (TRI->getNumRegUnits ());
148290 BitVector KillUsedRegUnits (TRI->getNumRegUnits ());
@@ -170,7 +312,7 @@ bool SIPostRABundler::run(MachineFunction &MF) {
170312 assert (Defs.empty ());
171313
172314 if (I->getNumExplicitDefs () != 0 )
173- Defs.insert (I-> defs (). begin ()-> getReg ( ));
315+ Defs.insert (getDef (*I ));
174316
175317 MachineBasicBlock::instr_iterator BundleStart = I;
176318 MachineBasicBlock::instr_iterator BundleEnd = I;
@@ -182,7 +324,7 @@ bool SIPostRABundler::run(MachineFunction &MF) {
182324 if (canBundle (*BundleEnd, *I)) {
183325 BundleEnd = I;
184326 if (I->getNumExplicitDefs () != 0 )
185- Defs.insert (I-> defs (). begin ()-> getReg ( ));
327+ Defs.insert (getDef (*I ));
186328 ++ClauseLength;
187329 } else if (!I->isMetaInstruction () ||
188330 I->getOpcode () == AMDGPU::SCHED_BARRIER) {
@@ -234,6 +376,7 @@ bool SIPostRABundler::run(MachineFunction &MF) {
234376 BundleUsedRegUnits.reset ();
235377 }
236378
379+ reorderLoads (MBB, BundleStart, Next);
237380 finalizeBundle (MBB, BundleStart, Next);
238381 }
239382
0 commit comments