@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150150 cl::init(2));
151151
152+ cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153+ "arm-max-base-updates-to-check", cl::Hidden,
154+ cl::desc("Maximum number of base-updates to check generating postindex."),
155+ cl::init(64));
156+
152157/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153158constexpr MVT FlagsVT = MVT::i32;
154159
@@ -15865,6 +15870,21 @@ struct BaseUpdateUser {
1586515870 unsigned ConstInc;
1586615871};
1586715872
15873+ static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15874+ // Check that the add is independent of the load/store.
15875+ // Otherwise, folding it would create a cycle. Search through Addr
15876+ // as well, since the User may not be a direct user of Addr and
15877+ // only share a base pointer.
15878+ SmallPtrSet<const SDNode *, 32> Visited;
15879+ SmallVector<const SDNode *, 16> Worklist;
15880+ Worklist.push_back(N);
15881+ Worklist.push_back(User);
15882+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, 1024) ||
15883+ SDNode::hasPredecessorHelper(User, Visited, Worklist, 1024))
15884+ return false;
15885+ return true;
15886+ }
15887+
1586815888static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1586915889 struct BaseUpdateUser &User,
1587015890 bool SimpleConstIncOnly,
@@ -16066,6 +16086,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1606616086 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
1606716087 return false;
1606816088
16089+ if (!isValidBaseUpdate(N, User.N))
16090+ return false;
16091+
1606916092 // OK, we found an ADD we can fold into the base update.
1607016093 // Now, create a _UPD node, taking care of not breaking alignment.
1607116094
@@ -16214,21 +16237,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
1621416237 }
1621516238}
1621616239
16217- static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16218- // Check that the add is independent of the load/store.
16219- // Otherwise, folding it would create a cycle. Search through Addr
16220- // as well, since the User may not be a direct user of Addr and
16221- // only share a base pointer.
16222- SmallPtrSet<const SDNode *, 32> Visited;
16223- SmallVector<const SDNode *, 16> Worklist;
16224- Worklist.push_back(N);
16225- Worklist.push_back(User);
16226- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16227- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16228- return false;
16229- return true;
16230- }
16231-
1623216240/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
1623316241/// NEON load/store intrinsics, and generic vector load/stores, to merge
1623416242/// base address updates.
@@ -16242,6 +16250,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
1624216250 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
1624316251 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
1624416252
16253+ // Limit the number of possible base-updates we look at to prevent degenerate
16254+ // cases.
16255+ unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16256+
1624516257 SDValue Addr = N->getOperand(AddrOpIdx);
1624616258
1624716259 SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16256,8 +16268,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
1625616268 unsigned ConstInc =
1625716269 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
1625816270
16259- if (ConstInc || User->getOpcode() == ISD::ADD)
16271+ if (ConstInc || User->getOpcode() == ISD::ADD) {
1626016272 BaseUpdates.push_back({User, Inc, ConstInc});
16273+ if (BaseUpdates.size() > MaxBaseUpdates)
16274+ break;
16275+ }
1626116276 }
1626216277
1626316278 // If the address is a constant pointer increment itself, find
@@ -16284,27 +16299,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
1628416299 unsigned NewConstInc = UserOffset - Offset;
1628516300 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
1628616301 BaseUpdates.push_back({User, NewInc, NewConstInc});
16302+ if (BaseUpdates.size() > MaxBaseUpdates)
16303+ break;
1628716304 }
1628816305 }
1628916306
1629016307 // Try to fold the load/store with an update that matches memory
1629116308 // access size. This should work well for sequential loads.
16292- //
16293- // Filter out invalid updates as well.
1629416309 unsigned NumValidUpd = BaseUpdates.size();
16295- for (unsigned I = 0; I < NumValidUpd;) {
16310+ for (unsigned I = 0; I < NumValidUpd; I++ ) {
1629616311 BaseUpdateUser &User = BaseUpdates[I];
16297- if (!isValidBaseUpdate(N, User.N)) {
16298- --NumValidUpd;
16299- std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16300- continue;
16301- }
16302-
1630316312 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
1630416313 return SDValue();
16305- ++I;
1630616314 }
16307- BaseUpdates.resize(NumValidUpd);
1630816315
1630916316 // Try to fold with other users. Non-constant updates are considered
1631016317 // first, and constant updates are sorted to not break a sequence of
@@ -16360,8 +16367,8 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
1636016367 Visited.insert(Addr.getNode());
1636116368 Worklist.push_back(N);
1636216369 Worklist.push_back(User);
16363- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16364- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16370+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, 1024 ) ||
16371+ SDNode::hasPredecessorHelper(User, Visited, Worklist, 1024 ))
1636516372 continue;
1636616373
1636716374 // Find the new opcode for the updating load/store.
0 commit comments