From 32447fed3a3e4d51b57927cd56c20d323222a5e2 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Tue, 29 Apr 2025 04:33:08 +0900 Subject: [PATCH] [GVN] Merge identical critical edge split blocks from the same predecessor GVN's PerformLoadPRE function split the CriticalEdge to copy the load if it could not hoist the load on the CriticalEdge. This was not supported if more than one splitted CriticalEdge was needed, as it would unnecessarily duplicate the load. This patch fixes this issue by adding the function mergeSplitedCriticalEdges. It identifies a group of identical edge-split blocks that have branched from the same predecessor and merges them into a single block. Modify the branch target of the predecessors to this merged block and remove other blocks. With this patch, even if it has multiple CriticalEdges that cannot hoist a load, if they can be merged, it can suppose optimisation opportunity. Proof: https://alive2.llvm.org/ce/z/-2rLLQ --- llvm/include/llvm/Transforms/Scalar/GVN.h | 3 + llvm/lib/Transforms/Scalar/GVN.cpp | 81 ++++++++++++++++- llvm/test/Transforms/GVN/PRE/pre-load.ll | 106 +++++++++++++++------- 3 files changed, 153 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index bc0f108ac8260..85cd834270a27 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -398,6 +398,9 @@ class GVNPass : public PassInfoMixin { void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); + void + mergeSplitedCriticalEdges(SmallVectorImpl &SplitedCriticalEdges, + MapVector &PredLoad); bool propagateEquality(Value *LHS, Value *RHS, const std::variant &Root); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 72e1131a54a86..c895b93acbf69 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1646,6 +1646,56 @@ void GVNPass::eliminatePartiallyRedundantLoad( salvageAndRemoveInstruction(Load); } +/// Merges splited critical edge blocks that originate from the same +/// predecessor. +void GVNPass::mergeSplitedCriticalEdges( + SmallVectorImpl &SplitedCriticalEdges, + MapVector &PredLoad) { + if (SplitedCriticalEdges.size() <= 1) + return; + + MapVector> PredEdgeMap; + // Group the splited edge blocks based on their predecessor (Pred). + // Example: Pred1 -> {Edge1, Edge2}, Pred2 -> {Edge3} + for (BasicBlock *Edge : SplitedCriticalEdges) { + // A splited edge block must have exactly one predecessor. + assert(Edge->getSinglePredecessor() && + "Splited edge block should have a single predecessor"); + auto *Pred = Edge->getSinglePredecessor(); + PredEdgeMap[Pred].push_back(Edge); + } + + // Iterate over the grouped edge blocks to perform the merge. + for (auto &PredEdgeEl : PredEdgeMap) { + BasicBlock *Pred = PredEdgeEl.first; + SmallVector &BBs = PredEdgeEl.second; + if (BBs.size() <= 1) + continue; + + // Select the first edge block as the representative block that will + // remain after merging. + BasicBlock *MergedBlock = BBs[0]; + for (BasicBlock *BB : llvm::drop_begin(BBs)) { + // Redirect all jumps to this block (BB) from its predecessor (which + // should only be Pred) to the MergedBlock. + Instruction *PredTI = Pred->getTerminator(); + for (unsigned I = 0, E = PredTI->getNumSuccessors(); I < E; ++I) { + if (PredTI->getSuccessor(I) == BB) { + PredTI->setSuccessor(I, MergedBlock); + LLVM_DEBUG(dbgs() << " Redirected successor in " << Pred->getName() + << " from " << BB->getName() << " to " + << MergedBlock->getName() << "\n"); + } + } + // Remove the block from the SplitedCriticalEdges list as well + auto it = find(SplitedCriticalEdges, BB); + SplitedCriticalEdges.erase(it); + DeleteDeadBlock(BB); + } + PredLoad[MergedBlock] = nullptr; + } +} + bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks) { // Okay, we have *some* definitions of the value. This means that the value @@ -1769,9 +1819,10 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, } // Decide whether PRE is profitable for this load. - unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size(); + unsigned NumInsertPreds = PredLoads.size(); unsigned NumUnavailablePreds = NumInsertPreds + - CriticalEdgePredAndLoad.size(); + CriticalEdgePredAndLoad.size() + + CriticalEdgePredSplit.size(); assert(NumUnavailablePreds != 0 && "Fully available value should already be eliminated!"); (void)NumUnavailablePreds; @@ -1800,14 +1851,36 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, return false; } - // Split critical edges, and update the unavailable predecessors accordingly. + // Verify that all successors of the predecessor (Pred) are included in the + // current group (BBs). If Pred has a successor that is not in BBs, merging + // these blocks could make the Pred -> (block not in BBs) edge critical again. + // If there is at least one CriticalEdgePredSplit that cannot be merged, it + // must be rejected because it would require inserting new loads into multiple + // predecessors. + if (CriticalEdgePredSplit.size() > 1 - PredLoads.size()) { + for (BasicBlock *OrigPred : CriticalEdgePredSplit) { + auto *PredTI = OrigPred->getTerminator(); + for (unsigned i = 0, e = PredTI->getNumSuccessors(); i < e - 1; ++i) + if (PredTI->getSuccessor(i) != PredTI->getSuccessor(i + 1)) + return false; + } + } + + // The edge from Pred to LoadBB is a critical edge will be splitted. + SmallVector SplitedCriticalEdges; for (BasicBlock *OrigPred : CriticalEdgePredSplit) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); + SplitedCriticalEdges.push_back(NewPred); assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); - PredLoads[NewPred] = nullptr; LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" << LoadBB->getName() << '\n'); } + // Attempts to merge the blocks created by splitting the CriticalEdges. The + // merged blocks are removed from SplitedCriticalEdges. + mergeSplitedCriticalEdges(SplitedCriticalEdges, PredLoads); + // Add the unmerged blocks separately. + for (auto BB : SplitedCriticalEdges) + PredLoads[BB] = nullptr; for (auto &CEP : CriticalEdgePredAndLoad) PredLoads[CEP.first] = nullptr; diff --git a/llvm/test/Transforms/GVN/PRE/pre-load.ll b/llvm/test/Transforms/GVN/PRE/pre-load.ll index afa13549db458..889e66a80dc1f 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-load.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-load.ll @@ -831,7 +831,7 @@ define void @test12(ptr %p) personality ptr @__CxxFrameHandler3 { ; CHECK: block3: ; CHECK-NEXT: ret void ; CHECK: catch.dispatch: -; CHECK-NEXT: [[CS1:%.*]] = catchswitch within none [label %catch] unwind label [[CLEANUP2:%.*]] +; CHECK-NEXT: [[CS1:%.*]] = catchswitch within none [label [[CATCH:%.*]]] unwind label [[CLEANUP2:%.*]] ; CHECK: catch: ; CHECK-NEXT: [[C:%.*]] = catchpad within [[CS1]] [] ; CHECK-NEXT: catchret from [[C]] to label [[BLOCK2]] @@ -1294,20 +1294,38 @@ lpad: ; A predecessor BB has both successors to the same BB, for simplicity we don't ; handle it, nothing should be changed. define void @test20(i1 %cond, i1 %cond2, ptr %p1, ptr %p2) { -; CHECK-LABEL: @test20( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2 -; CHECK-NEXT: [[DEC:%.*]] = add i16 [[V1]], -1 -; CHECK-NEXT: store i16 [[DEC]], ptr [[P1]], align 2 -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: br i1 [[COND2:%.*]], label [[IF_END]], label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[V2:%.*]] = load i16, ptr [[P1]], align 2 -; CHECK-NEXT: store i16 [[V2]], ptr [[P2:%.*]], align 2 -; CHECK-NEXT: ret void +; MDEP-LABEL: @test20( +; MDEP-NEXT: entry: +; MDEP-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; MDEP: if.then: +; MDEP-NEXT: [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2 +; MDEP-NEXT: [[DEC:%.*]] = add i16 [[V1]], -1 +; MDEP-NEXT: store i16 [[DEC]], ptr [[P1]], align 2 +; MDEP-NEXT: br label [[IF_END:%.*]] +; MDEP: if.else: +; MDEP-NEXT: br i1 [[COND2:%.*]], label [[IF_ELSE_IF_END_CRIT_EDGE:%.*]], label [[IF_ELSE_IF_END_CRIT_EDGE]] +; MDEP: if.else.if.end_crit_edge: +; MDEP-NEXT: [[V2_PRE:%.*]] = load i16, ptr [[P1]], align 2 +; MDEP-NEXT: br label [[IF_END]] +; MDEP: if.end: +; MDEP-NEXT: [[V2:%.*]] = phi i16 [ [[V2_PRE]], [[IF_ELSE_IF_END_CRIT_EDGE]] ], [ [[DEC]], [[IF_THEN]] ] +; MDEP-NEXT: store i16 [[V2]], ptr [[P2:%.*]], align 2 +; MDEP-NEXT: ret void +; +; MSSA-LABEL: @test20( +; MSSA-NEXT: entry: +; MSSA-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; MSSA: if.then: +; MSSA-NEXT: [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2 +; MSSA-NEXT: [[DEC:%.*]] = add i16 [[V1]], -1 +; MSSA-NEXT: store i16 [[DEC]], ptr [[P1]], align 2 +; MSSA-NEXT: br label [[IF_END:%.*]] +; MSSA: if.else: +; MSSA-NEXT: br i1 [[COND2:%.*]], label [[IF_END]], label [[IF_END]] +; MSSA: if.end: +; MSSA-NEXT: [[V2:%.*]] = load i16, ptr [[P1]], align 2 +; MSSA-NEXT: store i16 [[V2]], ptr [[P2:%.*]], align 2 +; MSSA-NEXT: ret void ; entry: br i1 %cond, label %if.then, label %if.else @@ -1329,24 +1347,46 @@ if.end: ; More edges from the same BB to LoadBB. Don't change anything. define void @test21(i1 %cond, i32 %code, ptr %p1, ptr %p2) { -; CHECK-LABEL: @test21( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2 -; CHECK-NEXT: [[DEC:%.*]] = add i16 [[V1]], -1 -; CHECK-NEXT: store i16 [[DEC]], ptr [[P1]], align 2 -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: switch i32 [[CODE:%.*]], label [[IF_END]] [ -; CHECK-NEXT: i32 1, label [[IF_END]] -; CHECK-NEXT: i32 2, label [[IF_END]] -; CHECK-NEXT: i32 3, label [[IF_END]] -; CHECK-NEXT: ] -; CHECK: if.end: -; CHECK-NEXT: [[V2:%.*]] = load i16, ptr [[P1]], align 2 -; CHECK-NEXT: store i16 [[V2]], ptr [[P2:%.*]], align 2 -; CHECK-NEXT: ret void +; MDEP-LABEL: @test21( +; MDEP-NEXT: entry: +; MDEP-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; MDEP: if.then: +; MDEP-NEXT: [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2 +; MDEP-NEXT: [[DEC:%.*]] = add i16 [[V1]], -1 +; MDEP-NEXT: store i16 [[DEC]], ptr [[P1]], align 2 +; MDEP-NEXT: br label [[IF_END:%.*]] +; MDEP: if.else: +; MDEP-NEXT: switch i32 [[CODE:%.*]], label [[IF_ELSE_IF_END_CRIT_EDGE:%.*]] [ +; MDEP-NEXT: i32 1, label [[IF_ELSE_IF_END_CRIT_EDGE]] +; MDEP-NEXT: i32 2, label [[IF_ELSE_IF_END_CRIT_EDGE]] +; MDEP-NEXT: i32 3, label [[IF_ELSE_IF_END_CRIT_EDGE]] +; MDEP-NEXT: ] +; MDEP: if.else.if.end_crit_edge: +; MDEP-NEXT: [[V2_PRE:%.*]] = load i16, ptr [[P1]], align 2 +; MDEP-NEXT: br label [[IF_END]] +; MDEP: if.end: +; MDEP-NEXT: [[V2:%.*]] = phi i16 [ [[V2_PRE]], [[IF_ELSE_IF_END_CRIT_EDGE]] ], [ [[DEC]], [[IF_THEN]] ] +; MDEP-NEXT: store i16 [[V2]], ptr [[P2:%.*]], align 2 +; MDEP-NEXT: ret void +; +; MSSA-LABEL: @test21( +; MSSA-NEXT: entry: +; MSSA-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; MSSA: if.then: +; MSSA-NEXT: [[V1:%.*]] = load i16, ptr [[P1:%.*]], align 2 +; MSSA-NEXT: [[DEC:%.*]] = add i16 [[V1]], -1 +; MSSA-NEXT: store i16 [[DEC]], ptr [[P1]], align 2 +; MSSA-NEXT: br label [[IF_END:%.*]] +; MSSA: if.else: +; MSSA-NEXT: switch i32 [[CODE:%.*]], label [[IF_END]] [ +; MSSA-NEXT: i32 1, label [[IF_END]] +; MSSA-NEXT: i32 2, label [[IF_END]] +; MSSA-NEXT: i32 3, label [[IF_END]] +; MSSA-NEXT: ] +; MSSA: if.end: +; MSSA-NEXT: [[V2:%.*]] = load i16, ptr [[P1]], align 2 +; MSSA-NEXT: store i16 [[V2]], ptr [[P2:%.*]], align 2 +; MSSA-NEXT: ret void ; entry: br i1 %cond, label %if.then, label %if.else