@@ -44,6 +44,7 @@ class SIPostRABundler : public MachineFunctionPass {
4444 }
4545
4646private:
47+ const SIInstrInfo *TII = nullptr ;
4748 const SIRegisterInfo *TRI;
4849
4950 SmallSet<Register, 16 > Defs;
@@ -54,6 +55,9 @@ class SIPostRABundler : public MachineFunctionPass {
5455 bool isBundleCandidate (const MachineInstr &MI) const ;
5556 bool isDependentLoad (const MachineInstr &MI) const ;
5657 bool canBundle (const MachineInstr &MI, const MachineInstr &NextMI) const ;
58+ void reorderLoads (MachineBasicBlock &MBB,
59+ MachineBasicBlock::instr_iterator &BundleStart,
60+ MachineBasicBlock::instr_iterator Next);
5761};
5862
5963constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
@@ -121,10 +125,132 @@ bool SIPostRABundler::canBundle(const MachineInstr &MI,
121125 !isDependentLoad (NextMI));
122126}
123127
128+ void SIPostRABundler::reorderLoads (
129+ MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &BundleStart,
130+ MachineBasicBlock::instr_iterator Next) {
131+ auto II = BundleStart;
132+ if (!TII->isMIMG (II->getOpcode ()) || II->mayStore ())
133+ return ;
134+
135+ LLVM_DEBUG (dbgs () << " Begin bundle reorder\n " );
136+
137+ // Collect clause
138+ SmallVector<MachineInstr *> Clause;
139+ for (auto II = BundleStart; II != Next; ++II)
140+ Clause.push_back (&*II);
141+
142+ // Search to find the usage distance of each defined register in the clause.
143+ const int MaxSearch = 100 ;
144+ SmallSet<Register, 16 > DefRegs (Defs);
145+ SmallSet<unsigned , 16 > Distances;
146+ DenseMap<Register, unsigned > UseDistance;
147+ unsigned Dist = 0 ;
148+ for (MachineBasicBlock::iterator SearchI = Next;
149+ SearchI != MBB.end () && Dist < MaxSearch && !DefRegs.empty ();
150+ ++SearchI, ++Dist) {
151+ SmallVector<Register, 4 > Found;
152+ // FIXME: fix search efficiency
153+ for (Register DefReg : DefRegs) {
154+ if (SearchI->readsRegister (DefReg, TRI))
155+ Found.push_back (DefReg);
156+ }
157+ for (Register Reg : Found) {
158+ UseDistance[Reg] = Dist;
159+ DefRegs.erase (Reg);
160+ Distances.insert (Dist);
161+ }
162+ }
163+
164+ if (Distances.size () <= 1 )
165+ return ;
166+
167+ std::vector<std::pair<MachineInstr *, unsigned >> Schedule;
168+ unsigned TotalOrder = Dist + 1 ;
169+ bool Reorder = false ;
170+ for (MachineInstr *MI : Clause) {
171+ unsigned Order = TotalOrder++;
172+ if (MI->getNumExplicitDefs () >= 0 ) {
173+ Register Reg = MI->defs ().begin ()->getReg ();
174+ if (!UseDistance.contains (Reg))
175+ continue ;
176+ Order = std::min (Order, UseDistance[Reg]);
177+ Reorder = true ;
178+ }
179+ LLVM_DEBUG (dbgs () << " Order: " << Order << " , MI: " << *MI);
180+ Schedule.push_back (std::pair (MI, Order));
181+ }
182+
183+ if (!Reorder)
184+ return ;
185+
186+ std::sort (Schedule.begin (), Schedule.end (),
187+ [](std::pair<MachineInstr *, unsigned > A,
188+ std::pair<MachineInstr *, unsigned > B) {
189+ return A.second < B.second ;
190+ });
191+
192+ // Rebuild clause order.
193+ // Schedule holds ideal order for the load operations; however, each def
194+ // can only be scheduled when it will no longer clobber any uses.
195+ Clause.clear ();
196+ while (!Schedule.empty ()) {
197+ auto It = Schedule.begin ();
198+ while (It != Schedule.end ()) {
199+ MachineInstr *MI = It->first ;
200+
201+ LLVM_DEBUG (dbgs () << " Try schedule: " << *MI);
202+
203+ if (MI->getNumExplicitDefs () == 0 ) {
204+ // No defs, always schedule.
205+ Clause.push_back (MI);
206+ break ;
207+ }
208+
209+ // FIXME: make this scan more efficient
210+ Register Reg = MI->defs ().begin ()->getReg ();
211+ bool ClobbersUse = false ;
212+ for (auto SearchIt = Schedule.begin (); SearchIt != Schedule.end ();
213+ ++SearchIt) {
214+ // We are allowed to clobber our own uses.
215+ if (SearchIt == It)
216+ continue ;
217+ if (SearchIt->first ->readsRegister (Reg, TRI)) {
218+ ClobbersUse = true ;
219+ break ;
220+ }
221+ }
222+ if (ClobbersUse) {
223+ // Use is clobbered; try next def in the schedule.
224+ It++;
225+ LLVM_DEBUG (dbgs () << " Clobbers uses\n " );
226+ continue ;
227+ }
228+
229+ // Safe to schedule.
230+ LLVM_DEBUG (dbgs () << " OK!\n " );
231+ Clause.push_back (MI);
232+ break ;
233+ }
234+ assert (It != Schedule.end ());
235+ Schedule.erase (It);
236+ }
237+
238+ // Apply order to instructions.
239+ for (MachineInstr *MI : Clause)
240+ MI->moveBefore (&*Next);
241+
242+ // FIXME: update kill flags
243+
244+ // Update start of bundle.
245+ BundleStart = Clause[0 ]->getIterator ();
246+ }
247+
124248bool SIPostRABundler::runOnMachineFunction (MachineFunction &MF) {
125249 if (skipFunction (MF.getFunction ()))
126250 return false ;
127251
252+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
253+ TII = ST.getInstrInfo ();
128254 TRI = MF.getSubtarget <GCNSubtarget>().getRegisterInfo ();
129255 BitVector BundleUsedRegUnits (TRI->getNumRegUnits ());
130256 BitVector KillUsedRegUnits (TRI->getNumRegUnits ());
@@ -214,6 +340,7 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
214340 BundleUsedRegUnits.reset ();
215341 }
216342
343+ reorderLoads (MBB, BundleStart, Next);
217344 finalizeBundle (MBB, BundleStart, Next);
218345 }
219346
0 commit comments