44// See https://llvm.org/LICENSE.txt for license information.
55// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66//
7- // (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
7+ // (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
88//
99// ===----------------------------------------------------------------------===//
1010//
4545// ===----------------------------------------------------------------------===//
4646
4747#include " AIE.h"
48+ #include " AIEBaseInstrInfo.h"
49+ #include " Utils/AIELoopUtils.h"
4850#include " llvm/CodeGen/GlobalISel/CSEInfo.h"
4951#include " llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
5052#include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
5153#include " llvm/CodeGen/MachineFunction.h"
5254#include " llvm/CodeGen/MachineFunctionPass.h"
5355#include " llvm/CodeGen/MachineInstrBuilder.h"
56+ #include " llvm/CodeGen/MachineLoopInfo.h"
5457#include " llvm/CodeGen/MachineModuleInfo.h"
5558#include " llvm/CodeGen/TargetPassConfig.h"
5659#include " llvm/InitializePasses.h"
@@ -72,6 +75,11 @@ static cl::opt<bool> EnableChainsForVectorLdSt(
7275 " aie-chain-addr-vec-ldst" , cl::Hidden, cl::init(true ),
7376 cl::desc(" Enable ptradd chaining for vector loads and stores." ));
7477
78+ cl::opt<int > AddressChainCostLimit (
79+ " aie-chain-cost-limit" ,
80+ cl::desc (" Maximum allowed cost for pointer add chains" ), cl::init(-1 ),
81+ cl::Hidden);
82+
7583namespace {
7684
7785// / Try and re-order PTR_ADD instructions to maximise the size of constant
@@ -163,6 +171,8 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
163171 void getAnalysisUsage (AnalysisUsage &AU) const override {
164172 AU.addRequired <MachineModuleInfoWrapperPass>();
165173 AU.addRequired <GISelCSEAnalysisWrapperPass>();
174+ AU.addRequired <MachineLoopInfo>();
175+ AU.addPreserved <MachineLoopInfo>();
166176 AU.addRequired <TargetPassConfig>();
167177 AU.setPreservesAll ();
168178 }
@@ -223,10 +233,123 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
223233 if (Instrs.size () <= 1 )
224234 return true ;
225235
226- // If the base reg is used in any of the successive MBBs, then we don't
227- // want to chain the corresponding ptr adds, since this would introduce a
228- // COPY and increase reg pressure.
229- return isRegUsedInSuccessiveMBBs (&MBB, PtrReg);
236+ // If the base reg is used in any of the successive MBBs, would introduce a
237+ // COPY and increase reg pressure. We only skip chaining in this case if it
238+ // is considered unprofitable.
239+ if (isRegUsedInSuccessiveMBBs (&MBB, PtrReg) &&
240+ !isChainingProfitable (PtrReg, Instrs, MBB))
241+ return true ;
242+
243+ return false ;
244+ }
245+
246+ // Decide heuristically if chaining will be profitable
247+ bool isChainingProfitable (Register PtrReg,
248+ const SmallVector<MachineInstr *, 8 > &Instrs,
249+ MachineBasicBlock &MBB) {
250+ const TargetSubtargetInfo &ST = MBB.getParent ()->getSubtarget ();
251+ const AIEBaseInstrInfo *TII =
252+ static_cast <const AIEBaseInstrInfo *>(ST.getInstrInfo ());
253+ using OffsetType = std::variant<int64_t , std::string>;
254+ assert (Instrs.size () > 1 );
255+
256+ bool InLoop = true ;
257+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
258+ MachineLoop *ToLoop = MLI.getLoopFor (&MBB);
259+ if (!ToLoop)
260+ InLoop = false ;
261+
262+ unsigned ChainedCost = 0 ;
263+ unsigned ChainedCostLimit = Instrs.size () / 2 ; // Experimental threshold
264+
265+ if (AddressChainCostLimit > -1 ) {
266+ ChainedCostLimit = AddressChainCostLimit;
267+ }
268+
269+ if (isRegUsedInSuccessiveMBBs (&MBB, PtrReg)) {
270+ if (InLoop)
271+ return false ; // A copy in a loop is costly
272+ ChainedCost += 1 ; // Add cost of resulting copy
273+ }
274+
275+ int64_t ImmediateRangeMax = 0 ;
276+ int64_t ImmediateRangeMin = 0 ;
277+ bool ImmediateRangeSet = false ;
278+ int64_t AccumulatedOffset = 0 ;
279+ int64_t NewOffset;
280+ SmallVector<OffsetType, 8 > Offsets;
281+
282+ for (unsigned I = 0 ; I < Instrs.size () - 1 ; I++) {
283+ MachineInstr *MI = Instrs[I];
284+ MachineInstr *MINext = Instrs[I + 1 ];
285+
286+ const Register PtrReg = MI->getOperand (0 ).getReg ();
287+ for (const MachineInstr &UseMI : MRI->use_instructions (PtrReg)) {
288+ if (ImmediateRangeSet)
289+ continue ; // Check first use only
290+ if (!UseMI.mayLoadOrStore ())
291+ continue ;
292+ const LLT MemType = getLoadStoreType (UseMI);
293+ // Immediate ranges for vectors are sufficient so we
294+ // assume chaining is always profitable.
295+ if (MemType.isVector ()) {
296+ return true ;
297+ } else {
298+ if (MemType.getSizeInBits () <= 32 ) {
299+ ImmediateRangeMax = TII->getLoadStorePostIncImmediateRange (MemType)
300+ .ImmediateRangeMax ;
301+ ImmediateRangeMin = TII->getLoadStorePostIncImmediateRange (MemType)
302+ .ImmediateRangeMin ;
303+ ImmediateRangeSet = true ;
304+ } else {
305+ llvm_unreachable (
306+ " unreachable: Unsupported immediate range of scalar size " );
307+ }
308+ }
309+ }
310+
311+ // If the immediate range is not set, the pointers aren't used by any
312+ // loads and stores, so we return.
313+ if (!ImmediateRangeSet) {
314+ assert (ImmediateRangeMin == 0 && ImmediateRangeMax == 0 );
315+ return false ;
316+ }
317+
318+ auto OffsetMI =
319+ getIConstantVRegValWithLookThrough (MI->getOperand (2 ).getReg (), *MRI);
320+ auto OffsetMINext = getIConstantVRegValWithLookThrough (
321+ MINext->getOperand (2 ).getReg (), *MRI);
322+
323+ if (shouldBreakChain (MI, MINext, OffsetMI, OffsetMINext)) {
324+ ChainedCost++;
325+ AccumulatedOffset = 0 ;
326+ Offsets.push_back (" Break" );
327+ continue ;
328+ }
329+
330+ const int64_t CurrOffset = OffsetMI->Value .getSExtValue ();
331+ const int64_t NextOffset = OffsetMINext->Value .getSExtValue ();
332+
333+ assert (I == 0 || !Offsets.empty ());
334+ AccumulatedOffset +=
335+ (I == 0 || (std::holds_alternative<std::string>(Offsets.back ()) &&
336+ std::get<std::string>(Offsets.back ()) == " Break" ))
337+ ? CurrOffset
338+ : NewOffset;
339+ Offsets.push_back (
340+ (I == 0 || (std::holds_alternative<std::string>(Offsets.back ()) &&
341+ std::get<std::string>(Offsets.back ()) == " Break" ))
342+ ? CurrOffset
343+ : OffsetType (NewOffset));
344+
345+ NewOffset = NextOffset - AccumulatedOffset;
346+
347+ if (NewOffset < ImmediateRangeMin || NewOffset > ImmediateRangeMax) {
348+ ChainedCost += 1 ; // Immediate materialization cost
349+ }
350+ }
351+
352+ return ChainedCostLimit > ChainedCost;
230353 }
231354
232355 // Build a chain (or set of chains) of G_PTR_ADDs. We consider as
0 commit comments