@@ -85,6 +85,11 @@ static cl::opt<unsigned>
8585 BDisplacementBits (" aarch64-b-offset-bits" , cl::Hidden, cl::init(26 ),
8686 cl::desc(" Restrict range of B instructions (DEBUG)" ));
8787
88+ static cl::opt<unsigned > GatherOptSearchLimit (
89+ " aarch64-search-limit" , cl::Hidden, cl::init(2048 ),
90+ cl::desc(" Restrict range of instructions to search for the "
91+ " machine-combiner gather pattern optimization" ));
92+
8893AArch64InstrInfo::AArch64InstrInfo (const AArch64Subtarget &STI)
8994 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
9095 AArch64::CATCHRET),
@@ -7414,39 +7419,6 @@ static bool getMiscPatterns(MachineInstr &Root,
74147419 return false ;
74157420}
74167421
7417- // / Collect all loads, stores and calls between `First` and `Last`.
7418- // / `First` and `Last` must be within the same MBB.
7419- static void getMemOps (const MachineInstr *First, const MachineInstr *Last,
7420- SmallVectorImpl<const MachineInstr *> &MemOps) {
7421- if (!First || !Last || First == Last)
7422- return ;
7423-
7424- // Both instructions must be in the same basic block.
7425- if (First->getParent () != Last->getParent ())
7426- return ;
7427-
7428- const MachineBasicBlock *MBB = First->getParent ();
7429- auto InstrIt = First->getIterator ();
7430- auto LastIt = Last->getIterator ();
7431- MemOps.push_back (First);
7432-
7433- for (; InstrIt != MBB->end (); ++InstrIt) {
7434- if (InstrIt == LastIt)
7435- break ;
7436-
7437- // Check for stores or calls that could interfere
7438- if (InstrIt->mayLoadOrStore () || InstrIt->isCall ())
7439- MemOps.push_back (&*InstrIt);
7440- }
7441-
7442- // If we have not iterated to `Last` the instructions must have not been
7443- // ordered correctly.
7444- assert (&*InstrIt == Last &&
7445- " Got bad machine instructions, First should come before Last!" );
7446-
7447- MemOps.push_back (Last);
7448- }
7449-
74507422// / Check if a given MachineInstr `MIa` may alias with any of the instructions
74517423// / in `MemInstrs`.
74527424static bool mayAlias (const MachineInstr &MIa,
@@ -7505,13 +7477,13 @@ static bool getGatherPattern(MachineInstr &Root,
75057477 auto *CurrInstr = MRI.getUniqueVRegDef (Root.getOperand (1 ).getReg ());
75067478 auto Range = llvm::seq<unsigned >(1 , NumLanes - 1 );
75077479 SmallSet<unsigned , 16 > RemainingLanes (Range.begin (), Range.end ());
7508- SmallSet <const MachineInstr *, 16 > LoadInstrs = {};
7480+ SmallVector <const MachineInstr *, 16 > LoadInstrs = {};
75097481 while (!RemainingLanes.empty () && CurrInstr &&
75107482 CurrInstr->getOpcode () == LoadLaneOpCode &&
75117483 MRI.hasOneNonDBGUse (CurrInstr->getOperand (0 ).getReg ()) &&
75127484 CurrInstr->getNumOperands () == 4 ) {
75137485 RemainingLanes.erase (CurrInstr->getOperand (2 ).getImm ());
7514- LoadInstrs.insert (CurrInstr);
7486+ LoadInstrs.push_back (CurrInstr);
75157487 CurrInstr = MRI.getUniqueVRegDef (CurrInstr->getOperand (1 ).getReg ());
75167488 }
75177489
@@ -7533,38 +7505,38 @@ static bool getGatherPattern(MachineInstr &Root,
75337505 if (!MRI.hasOneNonDBGUse (Lane0LoadReg))
75347506 return false ;
75357507
7536- LoadInstrs.insert (MRI.getUniqueVRegDef (Lane0LoadReg));
7537-
7538- // Check for intervening stores or calls between the first and last load.
7539- // Sort load instructions by program order.
7540- SmallVector<const MachineInstr *, 16 > SortedLoads (LoadInstrs.begin (),
7541- LoadInstrs.end ());
7542- llvm::sort (SortedLoads, [](const MachineInstr *A, const MachineInstr *B) {
7543- if (A->getParent () != B->getParent ()) {
7544- // If in different blocks, this shouldn't happen for gather patterns.
7545- return false ;
7546- }
7547- // Compare positions within the same basic block.
7548- for (const MachineInstr &MI : *A->getParent ()) {
7549- if (&MI == A)
7550- return true ;
7551- if (&MI == B)
7552- return false ;
7553- }
7554- return false ;
7555- });
7556-
7557- const MachineInstr *FirstLoad = SortedLoads.front ();
7558- const MachineInstr *LastLoad = SortedLoads.back ();
7559- SmallVector<const MachineInstr *, 16 > MemOps = {};
7560- getMemOps (FirstLoad, LastLoad, MemOps);
7508+ LoadInstrs.push_back (MRI.getUniqueVRegDef (Lane0LoadReg));
75617509
75627510 // If there is any chance of aliasing, do not apply the pattern.
7563- for (const MachineInstr *MemInstr : MemOps) {
7564- if (mayAlias (*MemInstr, MemOps, nullptr ))
7511+ // Walk backward through the MBB starting from Root.
7512+ // Exit early if we've encountered all load instructions or hit the search
7513+ // limit.
7514+ auto MBBItr = Root.getIterator ();
7515+ unsigned RemainingSteps = GatherOptSearchLimit;
7516+ SmallSet<const MachineInstr *, 16 > RemainingLoadInstrs;
7517+ RemainingLoadInstrs.insert (LoadInstrs.begin (), LoadInstrs.end ());
7518+ const MachineBasicBlock *MBB = Root.getParent ();
7519+
7520+ for (; MBBItr != MBB->begin () && RemainingSteps > 0 &&
7521+ !RemainingLoadInstrs.empty ();
7522+ --MBBItr, --RemainingSteps) {
7523+ const MachineInstr &CurrInstr = *MBBItr;
7524+
7525+ // Remove this instruction from remaining loads if it's one we're tracking.
7526+ RemainingLoadInstrs.erase (&CurrInstr);
7527+
7528+ // Check for potential aliasing with any of the load instructions to
7529+ // optimize.
7530+ if ((CurrInstr.mayLoadOrStore () || CurrInstr.isCall ()) &&
7531+ mayAlias (CurrInstr, LoadInstrs, nullptr ))
75657532 return false ;
75667533 }
75677534
7535+ // If we hit the search limit without finding all load instructions,
7536+ // don't match the pattern.
7537+ if (RemainingSteps == 0 && !RemainingLoadInstrs.empty ())
7538+ return false ;
7539+
75687540 switch (NumLanes) {
75697541 case 4 :
75707542 Patterns.push_back (AArch64MachineCombinerPattern::GATHER_LANE_i32);
0 commit comments