@@ -376,6 +376,7 @@ void AIEBaseInstrInfo::adjustTripCount(MachineInstr &MI, int Adjustment) const {
376376 auto &Imm = MI.getOperand (2 );
377377 Imm.setImm (Imm.getImm () + Adjustment);
378378}
379+
379380bool AIEBaseInstrInfo::isHardwareLoopStart (unsigned Opcode) const {
380381 const auto ZOLSupport = getZOLSupport ();
381382 return ZOLSupport && Opcode == ZOLSupport->LoopStartOpcode ;
@@ -397,13 +398,57 @@ bool AIEBaseInstrInfo::isLastZOLSetupBundleInMBB(
397398 return true ;
398399}
399400
400- unsigned AIEBaseInstrInfo::getRegionSize (
401+ // Compute the total size (in bytes) of all instruction bundles in the
402+ // pre-header that follow the last ZOL setup instruction.
403+ unsigned AIEBaseInstrInfo::getPostZOLRegionSize (MachineBasicBlock &MBB) const {
404+ unsigned Size = 0 ;
405+ for (auto &MI : llvm::reverse (MBB)) {
406+ if (MI.isDebugInstr ())
407+ continue ;
408+
409+ if (isZOLSetupBundle (&MI) && isLastZOLSetupBundleInMBB (&MI))
410+ break ;
411+ if (MI.isBundle ()) {
412+ AIE::MachineBundle Bundle = getAIEMachineBundle (MI);
413+ const VLIWFormat *Format = Bundle.getFormatOrNull ();
414+ assert (Format);
415+ Size += Format->getSize ();
416+ }
417+ }
418+ return Size;
419+ }
420+
421+ // Return true if this is ZeroOverhead loop body.
422+ bool AIEBaseInstrInfo::isZOLBody (const MachineBasicBlock &MBB) const {
423+ auto Last = MBB.getLastNonDebugInstr ();
424+
425+ // If MBB is empty or has no non-debug instructions, return false.
426+ if (Last == MBB.end ())
427+ return false ;
428+
429+ return isHardwareLoopEnd (Last->getOpcode ());
430+ }
431+
432+ // Count the number of Machine Bundles in a MachineBasicBlock.
433+ unsigned
434+ AIEBaseInstrInfo::getZOLBundlesCount (const MachineBasicBlock &MBB) const {
435+ if (!isZOLBody (MBB))
436+ return 0 ;
437+
438+ auto First = MBB.getFirstNonDebugInstr ();
439+ auto Last = MBB.getLastNonDebugInstr ();
440+
441+ return std::count_if (
442+ First, Last, [](const MachineInstr &MI) { return !MI.isDebugInstr (); });
443+ }
444+
445+ unsigned AIEBaseInstrInfo::getRegionSizeInBytes (
401446 llvm::iterator_range<MachineBasicBlock::iterator> Region) const {
402447 unsigned Size = 0 ;
403448 LLVM_DEBUG (dbgs () << " ---Region Begin---\n " );
404- for (auto it = Region.begin (), end = Region.end (); it != end ; ++it ) {
405- if (it ->isBundle ()) {
406- AIE::MachineBundle Bundle = getAIEMachineBundle (it );
449+ for (auto It = Region.begin (), End = Region.end (); It != End ; ++It ) {
450+ if (It ->isBundle ()) {
451+ AIE::MachineBundle Bundle = getAIEMachineBundle (It );
407452 const VLIWFormat *Format = Bundle.getFormatOrNull ();
408453 assert (Format);
409454 Size += Format->getSize ();
@@ -1082,17 +1127,75 @@ const PacketFormats &AIEBaseInstrInfo::getPacketFormats() const {
10821127std::vector<MachineBasicBlock::iterator>
10831128AIEBaseInstrInfo::getAlignmentBoundaries (MachineBasicBlock &MBB) const {
10841129 std::vector<MachineBasicBlock::iterator> AlgnCandidates;
1085- unsigned DelaySlot = 0 ;
10861130
1131+ unsigned DelaySlot = 0 ;
10871132 // LoopSetupDistance will be set to number of instructions (7). In
10881133 // PostRAScheduler, this is enforced by setting the exit latency in the
1089- // schduler dag mutator
1134+ // schduler dag mutator.
10901135 unsigned LoopSetupDistance = 0 ;
1136+ unsigned ZOLBundlesCount = 0 ;
10911137 bool IsCall = false ;
10921138 auto ZOLSupport = getZOLSupport ();
1139+
1140+ const bool IsZOLBody = isZOLBody (MBB);
1141+ if (IsZOLBody) {
1142+ assert (ZOLSupport);
1143+ auto LoopSizeExcludingLastBundle = [&](MachineBasicBlock &MBB) -> unsigned {
1144+ if (MBB.empty ())
1145+ return 0 ;
1146+
1147+ auto It = MBB.getLastNonDebugInstr ();
1148+ if (It == MBB.begin ())
1149+ return 0 ;
1150+ // Step before the PseudoLoopEnd.
1151+ --It;
1152+ while (It != MBB.begin ()) {
1153+ if (It->isBundle ())
1154+ return getRegionSizeInBytes (llvm::make_range (MBB.begin (), It));
1155+ --It;
1156+ }
1157+ return 0 ;
1158+ };
1159+
1160+ auto getPostZOLSetupRegionSize =
1161+ [&](MachineBasicBlock &LoopMBB) -> unsigned {
1162+ for (auto *Pred : LoopMBB.predecessors ()) {
1163+ if (Pred == &LoopMBB)
1164+ continue ;
1165+
1166+ const unsigned Size = getPostZOLRegionSize (*Pred);
1167+ if (Size > 0 )
1168+ return Size;
1169+ }
1170+ return 0 ;
1171+ };
1172+ const unsigned ZOLSetupToLoopEndDist = ZOLSupport->LoopSetupDistance ;
1173+ // Exclude the LoopEnd bundle as it must be placed in its own standalone
1174+ // region to guarantee 128-bit instruction alignment. Additionally, there
1175+ // must be a 112-byte gap (in PM address space) between writing to the ls,
1176+ // le, and lc registers and the LoopEnd instruction.
1177+ ZOLBundlesCount = getZOLBundlesCount (MBB) - 1 ;
1178+ if (ZOLBundlesCount < ZOLSetupToLoopEndDist)
1179+ LoopSetupDistance = ZOLBundlesCount;
1180+ else {
1181+ // Elongate the ZOL loop body only if the distance from the end of the
1182+ // ZOL setup instruction to the last bundle in the loop (excluding the
1183+ // final bundle) is less than 112 bytes.
1184+ const unsigned LoopSetupSizeInBytes = 16 * ZOLSetupToLoopEndDist;
1185+ const unsigned LoopSize = LoopSizeExcludingLastBundle (MBB);
1186+ if (LoopSize >= LoopSetupSizeInBytes)
1187+ LoopSetupDistance = 0 ;
1188+ else {
1189+ const unsigned PostZOLRegionSize = getPostZOLSetupRegionSize (MBB);
1190+ const bool DistanceConstraintMet =
1191+ (LoopSize + PostZOLRegionSize) >= LoopSetupSizeInBytes;
1192+ LoopSetupDistance = DistanceConstraintMet ? 0 : ZOLSetupToLoopEndDist;
1193+ }
1194+ }
1195+ }
10931196 for (auto MI = MBB.begin (), End = MBB.end (); MI != End; ++MI) {
10941197 if (MI->isBundle ()) {
1095- // Return Address Candidate
1198+ // Return Address Candidate.
10961199 IsCall = isCallBundle (MI);
10971200 if (IsCall && DelaySlot > 0 )
10981201 llvm_unreachable (" Cannot have branch in branch delay slot!\n " );
@@ -1118,8 +1221,15 @@ AIEBaseInstrInfo::getAlignmentBoundaries(MachineBasicBlock &MBB) const {
11181221 // Distance in terms of fully-expanded 128-bit bundles that
11191222 // loop setup should maintain. We force each of these bundles to an
11201223 // alignment boundary, so that they will occupy 16 bytes.
1121- if (ZOLSupport && isZOLSetupBundle (MI) && isLastZOLSetupBundleInMBB (MI))
1122- LoopSetupDistance = ZOLSupport->LoopSetupDistance ;
1224+ if (ZOLSupport && isZOLSetupBundle (MI) && isLastZOLSetupBundleInMBB (MI)) {
1225+ // if we have only one MBB, it must be the loop.
1226+ if (MBB.succ_size () == 1 ) {
1227+ const MachineBasicBlock *LoopSucc = *MBB.successors ().begin ();
1228+ ZOLBundlesCount = getZOLBundlesCount (*LoopSucc) - 1 ;
1229+ }
1230+ if (ZOLBundlesCount < ZOLSupport->LoopSetupDistance )
1231+ LoopSetupDistance = ZOLSupport->LoopSetupDistance - ZOLBundlesCount;
1232+ }
11231233 } else if (isHardwareLoopEnd (MI->getOpcode ())) {
11241234 if (DelaySlot > 0 )
11251235 llvm_unreachable (" Cannot have HWLoopEnd in branch delay slot!\n " );
@@ -1128,7 +1238,7 @@ AIEBaseInstrInfo::getAlignmentBoundaries(MachineBasicBlock &MBB) const {
11281238 AlgnCandidates.emplace_back (std::prev (MI));
11291239 } else if (!MI->isMetaInstruction ()) {
11301240 // single instruction, there should not be any
1131- // after Bundle Finalization Pass
1241+ // after Bundle Finalization Pass.
11321242 llvm_unreachable (" Found an un-expected standalone instruction !" );
11331243 }
11341244 }
0 commit comments