From 1cc855b7d79a8b942553d46eec8fe65968a50918 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 8 May 2025 21:51:46 +0100 Subject: [PATCH 1/2] [DAGCombine] Fold icmp with chain of or of loads Given a `icmp eq/ne or(..), 0`, it is only checking that some of the bits are set. Given chains of ors of loads that are offset from one another, we can convert the loads to a single larger load. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 94 ++++++++++++++++++- llvm/test/CodeGen/AArch64/icmp-or-load.ll | 30 ++---- 2 files changed, 99 insertions(+), 25 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 564cc372f595c..25358fc80119e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9551,6 +9551,90 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad); } +// Try to find a tree of or's with leafs that are all loads that are offset from +// the same base, and can be combined to a single larger load. +static SDValue MatchOrOfLoadToLargeLoad(SDValue Root, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = Root.getValueType(); + SmallVector Worklist; + Worklist.push_back(Root); + SmallVector> Loads; + std::optional Base; + LoadSDNode *BaseLoad = nullptr; + + // Check up the chain of or instructions with loads at the end. + while (!Worklist.empty()) { + SDValue V = Worklist.pop_back_val(); + if (!V.hasOneUse()) + return SDValue(); + if (V.getOpcode() == ISD::OR) { + Worklist.push_back(V.getOperand(0)); + Worklist.push_back(V.getOperand(1)); + } else if (V.getOpcode() == ISD::ZERO_EXTEND || + V.getOpcode() == ISD::SIGN_EXTEND) { + Worklist.push_back(V.getOperand(0)); + } else if (V.getOpcode() == ISD::LOAD) { + LoadSDNode *Ld = cast(V.getNode()); + if (!Ld->isSimple() || Ld->getMemoryVT().getSizeInBits() % 8 != 0) + return SDValue(); + + BaseIndexOffset Ptr = BaseIndexOffset::match(Ld, DAG); + int64_t ByteOffsetFromBase = 0; + if (!Base) { + Base = Ptr; + BaseLoad = Ld; + } else if (BaseLoad->getChain() != Ld->getChain() || + !Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) + return SDValue(); + Loads.push_back({Ld, ByteOffsetFromBase}); + } else { + return SDValue(); + } + } + + // Sort nodes by increasing ByteOffsetFromBase + llvm::sort(Loads, [](auto &A, auto &B) { return A.second < B.second; }); + Base = BaseIndexOffset::match(Loads[0].first, DAG); + + // Check that they are all adjacent in memory + int64_t BaseOffset = 0; + for (unsigned I = 0; I < Loads.size(); ++I) { + int64_t Offset = Loads[I].second - Loads[0].second; + if (Offset != BaseOffset) + return SDValue(); + BaseOffset += Loads[I].first->getMemoryVT().getSizeInBits() / 8; + } + + uint64_t MemSize = + Loads[Loads.size() - 1].second - Loads[0].second + + Loads[Loads.size() - 1].first->getMemoryVT().getSizeInBits() / 8; + if (!isPowerOf2_64(MemSize) || MemSize * 8 > VT.getSizeInBits()) + return SDValue(); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), MemSize * 8); + + bool NeedsZext = VT.bitsGT(MemVT); + if (!TLI.isLoadExtLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, VT, + MemVT)) + return SDValue(); + + unsigned Fast = 0; + bool Allowed = + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + *Loads[0].first->getMemOperand(), &Fast); + if (!Allowed || !Fast) + return SDValue(); + + SDValue NewLoad = DAG.getExtLoad( + NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(Root), VT, + Loads[0].first->getChain(), Loads[0].first->getBasePtr(), + Loads[0].first->getPointerInfo(), MemVT, Loads[0].first->getAlign()); + + // Transfer chain users from old loads to the new load. + for (auto &L : Loads) + DAG.makeEquivalentMemoryOrdering(L.first, NewLoad); + return NewLoad; +} + // If the target has andn, bsl, or a similar bit-select instruction, // we want to unfold masked merge, with canonical pattern of: // | A | |B| @@ -28649,7 +28733,15 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); - return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); + if (SDValue C = + TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL)) + return C; + + if ((Cond == ISD::SETNE || Cond == ISD::SETEQ) && isNullConstant(N1) && + N0.getOpcode() == ISD::OR) + if (SDValue Load = MatchOrOfLoadToLargeLoad(N0, DAG, TLI)) + return DAG.getSetCC(DL, VT, Load, N1, Cond); + return SDValue(); } /// Given an ISD::SDIV node expressing a divide by constant, return diff --git a/llvm/test/CodeGen/AArch64/icmp-or-load.ll b/llvm/test/CodeGen/AArch64/icmp-or-load.ll index 64db154c9b2c2..36862a92e8a17 100644 --- a/llvm/test/CodeGen/AArch64/icmp-or-load.ll +++ b/llvm/test/CodeGen/AArch64/icmp-or-load.ll @@ -4,9 +4,7 @@ define i1 @loadzext_i8i8(ptr %p) { ; CHECK-LABEL: loadzext_i8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -23,9 +21,7 @@ define i1 @loadzext_i8i8(ptr %p) { define i1 @loadzext_c_i8i8(ptr %p) { ; CHECK-LABEL: loadzext_c_i8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -85,13 +81,7 @@ define i1 @loadzext_i8i8i8(ptr %p) { define i1 @loadzext_i8i8i8i8(ptr %p) { ; CHECK-LABEL: loadzext_i8i8i8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: ldrb w10, [x0, #2] -; CHECK-NEXT: ldrb w11, [x0, #3] -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w9, w10, w11 -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -116,9 +106,7 @@ define i1 @loadzext_i8i8i8i8(ptr %p) { define i1 @load_i8i8(ptr %p) { ; CHECK-LABEL: load_i8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -133,9 +121,7 @@ define i1 @load_i8i8(ptr %p) { define i1 @load_i16i16(ptr %p) { ; CHECK-LABEL: load_i16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x0, #2] -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -182,11 +168,7 @@ define i1 @load_i64i64(ptr %p) { define i1 @load_i8i16i8(ptr %p) { ; CHECK-LABEL: load_i8i16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: ldrb w9, [x0, #3] -; CHECK-NEXT: ldurh w10, [x0, #1] -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret From 285cc8904e8c69e4deb6dc56af34c5e2da8f35e1 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 9 May 2025 11:42:01 +0100 Subject: [PATCH 2/2] Updates for review comments --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 25358fc80119e..48605b962d090 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9553,11 +9553,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Try to find a tree of or's with leafs that are all loads that are offset from // the same base, and can be combined to a single larger load. -static SDValue MatchOrOfLoadToLargeLoad(SDValue Root, SelectionDAG &DAG, +static SDValue matchOrOfLoadToLargeLoad(SDValue Root, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT = Root.getValueType(); - SmallVector Worklist; - Worklist.push_back(Root); + SmallVector Worklist = {Root}; SmallVector> Loads; std::optional Base; LoadSDNode *BaseLoad = nullptr; @@ -28737,9 +28736,9 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL)) return C; - if ((Cond == ISD::SETNE || Cond == ISD::SETEQ) && isNullConstant(N1) && - N0.getOpcode() == ISD::OR) - if (SDValue Load = MatchOrOfLoadToLargeLoad(N0, DAG, TLI)) + if ((Cond == ISD::SETNE || Cond == ISD::SETEQ) && N0.getOpcode() == ISD::OR && + isNullConstant(N1)) + if (SDValue Load = matchOrOfLoadToLargeLoad(N0, DAG, TLI)) return DAG.getSetCC(DL, VT, Load, N1, Cond); return SDValue(); }