@@ -149,6 +149,11 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149149 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150150 cl::init(2));
151151
152+ cl::opt<unsigned> ArmMaxBaseUpdatesToCheck(
153+ "arm-max-base-updates-to-check", cl::Hidden,
154+ cl::desc("Maximum number of base-updates to check generating postindex."),
155+ cl::init(64));
156+
152157/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153158constexpr MVT FlagsVT = MVT::i32;
154159
@@ -15865,6 +15870,22 @@ struct BaseUpdateUser {
1586515870 unsigned ConstInc;
1586615871};
1586715872
15873+ static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
15874+ // Check that the add is independent of the load/store.
15875+ // Otherwise, folding it would create a cycle. Search through Addr
15876+ // as well, since the User may not be a direct user of Addr and
15877+ // only share a base pointer.
15878+ SmallPtrSet<const SDNode *, 32> Visited;
15879+ SmallVector<const SDNode *, 16> Worklist;
15880+ Worklist.push_back(N);
15881+ Worklist.push_back(User);
15882+ const unsigned MaxSteps = 1024;
15883+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
15884+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
15885+ return false;
15886+ return true;
15887+ }
15888+
1586815889static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1586915890 struct BaseUpdateUser &User,
1587015891 bool SimpleConstIncOnly,
@@ -16066,6 +16087,9 @@ static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
1606616087 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
1606716088 return false;
1606816089
16090+ if (!isValidBaseUpdate(N, User.N))
16091+ return false;
16092+
1606916093 // OK, we found an ADD we can fold into the base update.
1607016094 // Now, create a _UPD node, taking care of not breaking alignment.
1607116095
@@ -16214,21 +16238,6 @@ static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
1621416238 }
1621516239}
1621616240
16217- static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
16218- // Check that the add is independent of the load/store.
16219- // Otherwise, folding it would create a cycle. Search through Addr
16220- // as well, since the User may not be a direct user of Addr and
16221- // only share a base pointer.
16222- SmallPtrSet<const SDNode *, 32> Visited;
16223- SmallVector<const SDNode *, 16> Worklist;
16224- Worklist.push_back(N);
16225- Worklist.push_back(User);
16226- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16227- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16228- return false;
16229- return true;
16230- }
16231-
1623216241/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
1623316242/// NEON load/store intrinsics, and generic vector load/stores, to merge
1623416243/// base address updates.
@@ -16242,6 +16251,10 @@ static SDValue CombineBaseUpdate(SDNode *N,
1624216251 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
1624316252 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
1624416253
16254+ // Limit the number of possible base-updates we look at to prevent degenerate
16255+ // cases.
16256+ unsigned MaxBaseUpdates = ArmMaxBaseUpdatesToCheck;
16257+
1624516258 SDValue Addr = N->getOperand(AddrOpIdx);
1624616259
1624716260 SmallVector<BaseUpdateUser, 8> BaseUpdates;
@@ -16256,8 +16269,11 @@ static SDValue CombineBaseUpdate(SDNode *N,
1625616269 unsigned ConstInc =
1625716270 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
1625816271
16259- if (ConstInc || User->getOpcode() == ISD::ADD)
16272+ if (ConstInc || User->getOpcode() == ISD::ADD) {
1626016273 BaseUpdates.push_back({User, Inc, ConstInc});
16274+ if (BaseUpdates.size() >= MaxBaseUpdates)
16275+ break;
16276+ }
1626116277 }
1626216278
1626316279 // If the address is a constant pointer increment itself, find
@@ -16284,27 +16300,19 @@ static SDValue CombineBaseUpdate(SDNode *N,
1628416300 unsigned NewConstInc = UserOffset - Offset;
1628516301 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
1628616302 BaseUpdates.push_back({User, NewInc, NewConstInc});
16303+ if (BaseUpdates.size() >= MaxBaseUpdates)
16304+ break;
1628716305 }
1628816306 }
1628916307
1629016308 // Try to fold the load/store with an update that matches memory
1629116309 // access size. This should work well for sequential loads.
16292- //
16293- // Filter out invalid updates as well.
1629416310 unsigned NumValidUpd = BaseUpdates.size();
16295- for (unsigned I = 0; I < NumValidUpd;) {
16311+ for (unsigned I = 0; I < NumValidUpd; I++ ) {
1629616312 BaseUpdateUser &User = BaseUpdates[I];
16297- if (!isValidBaseUpdate(N, User.N)) {
16298- --NumValidUpd;
16299- std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16300- continue;
16301- }
16302-
1630316313 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
1630416314 return SDValue();
16305- ++I;
1630616315 }
16307- BaseUpdates.resize(NumValidUpd);
1630816316
1630916317 // Try to fold with other users. Non-constant updates are considered
1631016318 // first, and constant updates are sorted to not break a sequence of
@@ -16360,8 +16368,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
1636016368 Visited.insert(Addr.getNode());
1636116369 Worklist.push_back(N);
1636216370 Worklist.push_back(User);
16363- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16364- SDNode::hasPredecessorHelper(User, Visited, Worklist))
16371+ const unsigned MaxSteps = 1024;
16372+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) ||
16373+ SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps))
1636516374 continue;
1636616375
1636716376 // Find the new opcode for the updating load/store.
0 commit comments