@@ -9551,6 +9551,90 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
95519551 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
95529552}
95539553
9554+ // Try to find a tree of or's with leafs that are all loads that are offset from
9555+ // the same base, and can be combined to a single larger load.
9556+ static SDValue MatchOrOfLoadToLargeLoad(SDValue Root, SelectionDAG &DAG,
9557+ const TargetLowering &TLI) {
9558+ EVT VT = Root.getValueType();
9559+ SmallVector<SDValue> Worklist;
9560+ Worklist.push_back(Root);
9561+ SmallVector<std::pair<LoadSDNode *, int64_t>> Loads;
9562+ std::optional<BaseIndexOffset> Base;
9563+ LoadSDNode *BaseLoad = nullptr;
9564+
9565+ // Check up the chain of or instructions with loads at the end.
9566+ while (!Worklist.empty()) {
9567+ SDValue V = Worklist.pop_back_val();
9568+ if (!V.hasOneUse())
9569+ return SDValue();
9570+ if (V.getOpcode() == ISD::OR) {
9571+ Worklist.push_back(V.getOperand(0));
9572+ Worklist.push_back(V.getOperand(1));
9573+ } else if (V.getOpcode() == ISD::ZERO_EXTEND ||
9574+ V.getOpcode() == ISD::SIGN_EXTEND) {
9575+ Worklist.push_back(V.getOperand(0));
9576+ } else if (V.getOpcode() == ISD::LOAD) {
9577+ LoadSDNode *Ld = cast<LoadSDNode>(V.getNode());
9578+ if (!Ld->isSimple() || Ld->getMemoryVT().getSizeInBits() % 8 != 0)
9579+ return SDValue();
9580+
9581+ BaseIndexOffset Ptr = BaseIndexOffset::match(Ld, DAG);
9582+ int64_t ByteOffsetFromBase = 0;
9583+ if (!Base) {
9584+ Base = Ptr;
9585+ BaseLoad = Ld;
9586+ } else if (BaseLoad->getChain() != Ld->getChain() ||
9587+ !Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9588+ return SDValue();
9589+ Loads.push_back({Ld, ByteOffsetFromBase});
9590+ } else {
9591+ return SDValue();
9592+ }
9593+ }
9594+
9595+ // Sort nodes by increasing ByteOffsetFromBase
9596+ llvm::sort(Loads, [](auto &A, auto &B) { return A.second < B.second; });
9597+ Base = BaseIndexOffset::match(Loads[0].first, DAG);
9598+
9599+ // Check that they are all adjacent in memory
9600+ int64_t BaseOffset = 0;
9601+ for (unsigned I = 0; I < Loads.size(); ++I) {
9602+ int64_t Offset = Loads[I].second - Loads[0].second;
9603+ if (Offset != BaseOffset)
9604+ return SDValue();
9605+ BaseOffset += Loads[I].first->getMemoryVT().getSizeInBits() / 8;
9606+ }
9607+
9608+ uint64_t MemSize =
9609+ Loads[Loads.size() - 1].second - Loads[0].second +
9610+ Loads[Loads.size() - 1].first->getMemoryVT().getSizeInBits() / 8;
9611+ if (!isPowerOf2_64(MemSize) || MemSize * 8 > VT.getSizeInBits())
9612+ return SDValue();
9613+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), MemSize * 8);
9614+
9615+ bool NeedsZext = VT.bitsGT(MemVT);
9616+ if (!TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT,
9617+ MemVT))
9618+ return SDValue();
9619+
9620+ unsigned Fast = 0;
9621+ bool Allowed =
9622+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9623+ *Loads[0].first->getMemOperand(), &Fast);
9624+ if (!Allowed || !Fast)
9625+ return SDValue();
9626+
9627+ SDValue NewLoad = DAG.getExtLoad(
9628+ NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(Root), VT,
9629+ Loads[0].first->getChain(), Loads[0].first->getBasePtr(),
9630+ Loads[0].first->getPointerInfo(), MemVT, Loads[0].first->getAlign());
9631+
9632+ // Transfer chain users from old loads to the new load.
9633+ for (auto &L : Loads)
9634+ DAG.makeEquivalentMemoryOrdering(L.first, NewLoad);
9635+ return NewLoad;
9636+ }
9637+
95549638// If the target has andn, bsl, or a similar bit-select instruction,
95559639// we want to unfold masked merge, with canonical pattern of:
95569640// | A | |B|
@@ -28649,7 +28733,15 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
2864928733 bool foldBooleans) {
2865028734 TargetLowering::DAGCombinerInfo
2865128735 DagCombineInfo(DAG, Level, false, this);
28652- return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
28736+ if (SDValue C =
28737+ TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL))
28738+ return C;
28739+
28740+ if ((Cond == ISD::SETNE || Cond == ISD::SETEQ) && isNullConstant(N1) &&
28741+ N0.getOpcode() == ISD::OR)
28742+ if (SDValue Load = MatchOrOfLoadToLargeLoad(N0, DAG, TLI))
28743+ return DAG.getSetCC(DL, VT, Load, N1, Cond);
28744+ return SDValue();
2865328745}
2865428746
2865528747/// Given an ISD::SDIV node expressing a divide by constant, return
0 commit comments