Skip to content

Commit b1ac573

Browse files
Abnikant Singhabnikant
authored andcommitted
[AIE] Improve Zero-overhead loop 112-bytes padding
1 parent 97fff7e commit b1ac573

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+671
-857
lines changed

llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp

Lines changed: 120 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ void AIEBaseInstrInfo::adjustTripCount(MachineInstr &MI, int Adjustment) const {
376376
auto &Imm = MI.getOperand(2);
377377
Imm.setImm(Imm.getImm() + Adjustment);
378378
}
379+
379380
bool AIEBaseInstrInfo::isHardwareLoopStart(unsigned Opcode) const {
380381
const auto ZOLSupport = getZOLSupport();
381382
return ZOLSupport && Opcode == ZOLSupport->LoopStartOpcode;
@@ -397,13 +398,57 @@ bool AIEBaseInstrInfo::isLastZOLSetupBundleInMBB(
397398
return true;
398399
}
399400

400-
unsigned AIEBaseInstrInfo::getRegionSize(
401+
// Compute the total size (in bytes) of all instruction bundles in the
402+
// pre-header that follow the last ZOL setup instruction.
403+
unsigned AIEBaseInstrInfo::getPostZOLRegionSize(MachineBasicBlock &MBB) const {
404+
unsigned Size = 0;
405+
for (auto &MI : llvm::reverse(MBB)) {
406+
if (MI.isDebugInstr())
407+
continue;
408+
409+
if (isZOLSetupBundle(&MI) && isLastZOLSetupBundleInMBB(&MI))
410+
break;
411+
if (MI.isBundle()) {
412+
AIE::MachineBundle Bundle = getAIEMachineBundle(MI);
413+
const VLIWFormat *Format = Bundle.getFormatOrNull();
414+
assert(Format);
415+
Size += Format->getSize();
416+
}
417+
}
418+
return Size;
419+
}
420+
421+
// Return true if this is ZeroOverhead loop body.
422+
bool AIEBaseInstrInfo::isZOLBody(const MachineBasicBlock &MBB) const {
423+
auto Last = MBB.getLastNonDebugInstr();
424+
425+
// If MBB is empty or has no non-debug instructions, return false.
426+
if (Last == MBB.end())
427+
return false;
428+
429+
return isHardwareLoopEnd(Last->getOpcode());
430+
}
431+
432+
// Count the number of Machine Bundles in a MachineBasicBlock.
433+
unsigned
434+
AIEBaseInstrInfo::getZOLBundlesCount(const MachineBasicBlock &MBB) const {
435+
if (!isZOLBody(MBB))
436+
return 0;
437+
438+
auto First = MBB.getFirstNonDebugInstr();
439+
auto Last = MBB.getLastNonDebugInstr();
440+
441+
return std::count_if(
442+
First, Last, [](const MachineInstr &MI) { return !MI.isDebugInstr(); });
443+
}
444+
445+
unsigned AIEBaseInstrInfo::getRegionSizeInBytes(
401446
llvm::iterator_range<MachineBasicBlock::iterator> Region) const {
402447
unsigned Size = 0;
403448
LLVM_DEBUG(dbgs() << "---Region Begin---\n");
404-
for (auto it = Region.begin(), end = Region.end(); it != end; ++it) {
405-
if (it->isBundle()) {
406-
AIE::MachineBundle Bundle = getAIEMachineBundle(it);
449+
for (auto It = Region.begin(), End = Region.end(); It != End; ++It) {
450+
if (It->isBundle()) {
451+
AIE::MachineBundle Bundle = getAIEMachineBundle(It);
407452
const VLIWFormat *Format = Bundle.getFormatOrNull();
408453
assert(Format);
409454
Size += Format->getSize();
@@ -1082,17 +1127,75 @@ const PacketFormats &AIEBaseInstrInfo::getPacketFormats() const {
10821127
std::vector<MachineBasicBlock::iterator>
10831128
AIEBaseInstrInfo::getAlignmentBoundaries(MachineBasicBlock &MBB) const {
10841129
std::vector<MachineBasicBlock::iterator> AlgnCandidates;
1085-
unsigned DelaySlot = 0;
10861130

1131+
unsigned DelaySlot = 0;
10871132
// LoopSetupDistance will be set to number of instructions (7). In
10881133
// PostRAScheduler, this is enforced by setting the exit latency in the
1089-
// schduler dag mutator
1134+
// schduler dag mutator.
10901135
unsigned LoopSetupDistance = 0;
1136+
unsigned ZOLBundlesCount = 0;
10911137
bool IsCall = false;
10921138
auto ZOLSupport = getZOLSupport();
1139+
1140+
const bool IsZOLBody = isZOLBody(MBB);
1141+
if (IsZOLBody) {
1142+
assert(ZOLSupport);
1143+
auto LoopSizeExcludingLastBundle = [&](MachineBasicBlock &MBB) -> unsigned {
1144+
if (MBB.empty())
1145+
return 0;
1146+
1147+
auto It = MBB.getLastNonDebugInstr();
1148+
if (It == MBB.begin())
1149+
return 0;
1150+
// Step before the PseudoLoopEnd.
1151+
--It;
1152+
while (It != MBB.begin()) {
1153+
if (It->isBundle())
1154+
return getRegionSizeInBytes(llvm::make_range(MBB.begin(), It));
1155+
--It;
1156+
}
1157+
return 0;
1158+
};
1159+
1160+
auto getPostZOLSetupRegionSize =
1161+
[&](MachineBasicBlock &LoopMBB) -> unsigned {
1162+
for (auto *Pred : LoopMBB.predecessors()) {
1163+
if (Pred == &LoopMBB)
1164+
continue;
1165+
1166+
const unsigned Size = getPostZOLRegionSize(*Pred);
1167+
if (Size > 0)
1168+
return Size;
1169+
}
1170+
return 0;
1171+
};
1172+
const unsigned ZOLSetupToLoopEndDist = ZOLSupport->LoopSetupDistance;
1173+
// Exclude the LoopEnd bundle as it must be placed in its own standalone
1174+
// region to guarantee 128-bit instruction alignment. Additionally, there
1175+
// must be a 112-byte gap (in PM address space) between writing to the ls,
1176+
// le, and lc registers and the LoopEnd instruction.
1177+
ZOLBundlesCount = getZOLBundlesCount(MBB) - 1;
1178+
if (ZOLBundlesCount < ZOLSetupToLoopEndDist)
1179+
LoopSetupDistance = ZOLBundlesCount;
1180+
else {
1181+
// Elongate the ZOL loop body only if the distance from the end of the
1182+
// ZOL setup instruction to the last bundle in the loop (excluding the
1183+
// final bundle) is less than 112 bytes.
1184+
const unsigned LoopSetupSizeInBytes = 16 * ZOLSetupToLoopEndDist;
1185+
const unsigned LoopSize = LoopSizeExcludingLastBundle(MBB);
1186+
if (LoopSize >= LoopSetupSizeInBytes)
1187+
LoopSetupDistance = 0;
1188+
else {
1189+
const unsigned PostZOLRegionSize = getPostZOLSetupRegionSize(MBB);
1190+
const bool DistanceConstraintMet =
1191+
(LoopSize + PostZOLRegionSize) >= LoopSetupSizeInBytes;
1192+
LoopSetupDistance = DistanceConstraintMet ? 0 : ZOLSetupToLoopEndDist;
1193+
}
1194+
}
1195+
}
10931196
for (auto MI = MBB.begin(), End = MBB.end(); MI != End; ++MI) {
10941197
if (MI->isBundle()) {
1095-
// Return Address Candidate
1198+
// Return Address Candidate.
10961199
IsCall = isCallBundle(MI);
10971200
if (IsCall && DelaySlot > 0)
10981201
llvm_unreachable("Cannot have branch in branch delay slot!\n");
@@ -1118,8 +1221,15 @@ AIEBaseInstrInfo::getAlignmentBoundaries(MachineBasicBlock &MBB) const {
11181221
// Distance in terms of fully-expanded 128-bit bundles that
11191222
// loop setup should maintain. We force each of these bundles to an
11201223
// alignment boundary, so that they will occupy 16 bytes.
1121-
if (ZOLSupport && isZOLSetupBundle(MI) && isLastZOLSetupBundleInMBB(MI))
1122-
LoopSetupDistance = ZOLSupport->LoopSetupDistance;
1224+
if (ZOLSupport && isZOLSetupBundle(MI) && isLastZOLSetupBundleInMBB(MI)) {
1225+
// if we have only one MBB, it must be the loop.
1226+
if (MBB.succ_size() == 1) {
1227+
const MachineBasicBlock *LoopSucc = *MBB.successors().begin();
1228+
ZOLBundlesCount = getZOLBundlesCount(*LoopSucc) - 1;
1229+
}
1230+
if (ZOLBundlesCount < ZOLSupport->LoopSetupDistance)
1231+
LoopSetupDistance = ZOLSupport->LoopSetupDistance - ZOLBundlesCount;
1232+
}
11231233
} else if (isHardwareLoopEnd(MI->getOpcode())) {
11241234
if (DelaySlot > 0)
11251235
llvm_unreachable("Cannot have HWLoopEnd in branch delay slot!\n");
@@ -1128,7 +1238,7 @@ AIEBaseInstrInfo::getAlignmentBoundaries(MachineBasicBlock &MBB) const {
11281238
AlgnCandidates.emplace_back(std::prev(MI));
11291239
} else if (!MI->isMetaInstruction()) {
11301240
// single instruction, there should not be any
1131-
// after Bundle Finalization Pass
1241+
// after Bundle Finalization Pass.
11321242
llvm_unreachable("Found an un-expected standalone instruction !");
11331243
}
11341244
}

llvm/lib/Target/AIE/AIEBaseInstrInfo.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define LLVM_LIB_TARGET_AIE_AIEBASEINSTRRINFO_H
1717

1818
#include "AIE.h"
19+
#include "AIEBundle.h"
1920
#include "AIEMIRFormatter.h"
2021
#include "AIETiedRegOperands.h"
2122
#include "MCTargetDesc/AIEFormat.h"
@@ -35,7 +36,6 @@ template <class I> class Bundle;
3536

3637
struct AIEBaseInstrInfo : public TargetInstrInfo {
3738
using TargetInstrInfo::TargetInstrInfo;
38-
3939
// This codifies the model of ZeroOverheadLoops
4040
class ZOLSupport {
4141
public:
@@ -317,6 +317,12 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
317317
// registers(lc, le, ls, etc.) and the end of the loop,
318318
virtual unsigned getLoopSetupDistance() const;
319319

320+
virtual unsigned getZOLBundlesCount(const MachineBasicBlock &MBB) const;
321+
322+
virtual unsigned getPostZOLRegionSize(MachineBasicBlock &MBB) const;
323+
324+
virtual bool isZOLBody(const MachineBasicBlock &MBB) const;
325+
320326
// Return the vector of Alignment Region Boundaries.
321327
virtual std::vector<MachineBasicBlock::iterator>
322328
getAlignmentBoundaries(MachineBasicBlock &MBB) const;
@@ -441,8 +447,8 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
441447
bool isLastZOLSetupBundleInMBB(MachineBasicBlock::iterator MII) const;
442448
virtual const AIE::Bundle<MachineInstr>
443449
getAIEMachineBundle(MachineBasicBlock::iterator MII) const;
444-
virtual unsigned
445-
getRegionSize(llvm::iterator_range<MachineBasicBlock::iterator> Region) const;
450+
virtual unsigned getRegionSizeInBytes(
451+
llvm::iterator_range<MachineBasicBlock::iterator> Region) const;
446452

447453
/// Central place to compute RAW/WAR/WAW operand latencies.
448454
/// This uses itineraries when they exist. It returns std::nullopt for

llvm/lib/Target/AIE/AIEBaseSubtarget.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@ class RegionEndEdges : public ScheduleDAGMutation {
260260
}
261261
void apply(ScheduleDAGInstrs *DAG) override {
262262
AIE::MaxLatencyFinder MaxLatency(DAG);
263+
MachineBasicBlock *PrologueMBB = DAG->getBB();
264+
unsigned int ZOLBundlesCount = 0;
263265

264266
// Default edges to ExitSU are conservative, and can't be shrunk.
265267
// We really should know what we're doing here, so just remove and
@@ -296,9 +298,18 @@ class RegionEndEdges : public ScheduleDAGMutation {
296298
if (TII->isZeroOverheadLoopSetupInstr(MI)) {
297299
auto ZOLSupport = TII->getZOLSupport();
298300
assert(ZOLSupport);
299-
EdgeLatency = std::max(EdgeLatency, ZOLSupport->LoopSetupDistance + 1);
301+
if (PrologueMBB && PrologueMBB->succ_size() == 1) {
302+
// if we have only one MBB, it must be the loop.
303+
MachineBasicBlock *LoopSucc = *PrologueMBB->successors().begin();
304+
// Exclude the LoopEnd bundle since it must reside in its own
305+
// standalone region to ensure it points to a 128-bit aligned
306+
// instruction.
307+
ZOLBundlesCount = TII->getZOLBundlesCount(*LoopSucc) - 1;
308+
}
309+
if (ZOLBundlesCount < ZOLSupport->LoopSetupDistance)
310+
EdgeLatency = std::max(EdgeLatency, ZOLSupport->LoopSetupDistance +
311+
1 - ZOLBundlesCount);
300312
}
301-
302313
ExitDep.setLatency(EdgeLatency);
303314
DAG->ExitSU.addPred(ExitDep, /*Required=*/true);
304315
}

llvm/lib/Target/AIE/AIEBaseSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "AIEBaseAddrSpaceInfo.h"
1919
#include "AIEBaseInstrInfo.h"
2020
#include "Utils/AIEBaseInfo.h"
21+
#include "llvm/CodeGen/MachineScheduler.h"
2122
#include "llvm/CodeGen/ScheduleDAGMutation.h"
2223
#include "llvm/CodeGenTypes/MachineValueType.h"
2324
#include "llvm/MC/MCInstrItineraries.h"

llvm/lib/Target/AIE/AIEMachineAlignment.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ void AIEMachineAlignment::applyBundlesAlignment(
150150
for (auto Region : Regions) {
151151
unsigned Size = 0;
152152
unsigned PadBytes = 0;
153-
Size = TII->getRegionSize(Region);
153+
Size = TII->getRegionSizeInBytes(Region);
154154
if ((Size % 16) == 0)
155155
continue;
156156
PadBytes = 16 - (Size % 16);
@@ -167,8 +167,8 @@ void AIEMachineAlignment::applyBundlesAlignment(
167167

168168
// Find Regions for Alignment Candidate e.g. Region ending with Return Address,
169169
// End of BB, etc.
170-
static std::vector<llvm::iterator_range<MachineBasicBlock::iterator>>
171-
findRegions(MachineBasicBlock &MBB) {
170+
std::vector<llvm::iterator_range<MachineBasicBlock::iterator>>
171+
AIEMachineAlignment::findRegions(MachineBasicBlock &MBB) {
172172
auto *TII = static_cast<const AIEBaseInstrInfo *>(
173173
MBB.getParent()->getSubtarget().getInstrInfo());
174174
MachineBasicBlock::iterator RegionBegin = MBB.begin();
@@ -192,11 +192,12 @@ bool AIEMachineAlignment::runOnMachineFunction(MachineFunction &MF) {
192192
// ordering of the blocks within the function.
193193
MF.RenumberBlocks();
194194

195+
auto *TII =
196+
static_cast<const AIEBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
197+
195198
for (auto &MBB : MF) {
196199
std::vector<llvm::iterator_range<MachineBasicBlock::iterator>> Regions =
197200
findRegions(MBB);
198-
auto *TII = static_cast<const AIEBaseInstrInfo *>(
199-
MBB.getParent()->getSubtarget().getInstrInfo());
200201
applyBundlesAlignment(Regions, TII);
201202
// Clean up BB local Regions
202203
Regions.clear();

llvm/lib/Target/AIE/AIEMachineAlignment.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,17 @@
1818
namespace llvm {
1919

2020
class AIEMachineAlignment : public llvm::MachineFunctionPass {
21-
2221
public:
2322
static char ID;
2423
AIEMachineAlignment() : MachineFunctionPass(ID) {}
2524
llvm::StringRef getPassName() const override {
2625
return "AIE Machine Alignment";
2726
}
2827
bool runOnMachineFunction(llvm::MachineFunction &MF) override;
28+
29+
std::vector<llvm::iterator_range<MachineBasicBlock::iterator>>
30+
findRegions(MachineBasicBlock &MBB);
31+
2932
void applyBundlesAlignment(
3033
const std::vector<llvm::iterator_range<MachineBasicBlock::iterator>>
3134
&Regions,

0 commit comments

Comments
 (0)