66//
77// ===----------------------------------------------------------------------===//
88//
9- // TODO: This implements a function merge using function hash while tracking
10- // differences in Constants. This uses stable function hash to find potential
11- // merge candidates. The first codegen round collects stable function hashes,
12- // and determines the merge candidates that match the stable function hashes.
13- // The set of parameters pointing to different Constants are also computed
14- // during the stable function merge. The second codegen round uses this global
15- // function info to optimistically create a merged function in each module
16- // context to guarantee correct transformation. Similar to the global outliner,
17- // the linker's deduplication (ICF) folds the identical merged functions to save
18- // the final binary size.
9+ // This pass defines the implementation of a function merging mechanism
10+ // that utilizes a stable function hash to track differences in constants and
11+ // create potential merge candidates. The process involves two rounds:
12+ // 1. The first round collects stable function hashes and identifies merge
13+ // candidates with matching hashes. It also computes the set of parameters
14+ // that point to different constants during the stable function merge.
15+ // 2. The second round leverages this collected global function information to
16+ // optimistically create a merged function in each module context, ensuring
17+ // correct transformation.
18+ // Similar to the global outliner, this approach uses the linker's deduplication
19+ // (ICF) to fold identical merged functions, thereby reducing the final binary
20+ // size. The work is inspired by the concepts discussed in the following paper:
21+ // https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
1922//
2023// ===----------------------------------------------------------------------===//
2124
2225#include " llvm/Transforms/IPO/GlobalMergeFunctions.h"
2326#include " llvm/ADT/Statistic.h"
2427#include " llvm/Analysis/ModuleSummaryAnalysis.h"
2528#include " llvm/CGData/CodeGenData.h"
26- #include " llvm/CGData/StableFunctionMap.h"
27- #include " llvm/CodeGen/MachineStableHash.h"
28- #include " llvm/CodeGen/Passes.h"
2929#include " llvm/IR/IRBuilder.h"
3030#include " llvm/IR/StructuralHash.h"
3131#include " llvm/InitializePasses.h"
@@ -59,7 +59,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
5959STATISTIC (NumAnalyzedFunctions, " Number of functions that are analyzed" );
6060STATISTIC (NumEligibleFunctions, " Number of functions that are eligible" );
6161
62- // / Returns true if the \opIdx operand of \p CI is the callee operand.
62+ // / Returns true if the \OpIdx operand of \p CI is the callee operand.
6363static bool isCalleeOperand (const CallBase *CI, unsigned OpIdx) {
6464 return &CI->getCalledOperandUse () == &CI->getOperandUse (OpIdx);
6565}
@@ -123,22 +123,19 @@ bool isEligibleFunction(Function *F) {
123123 if (F->hasFnAttribute (llvm::Attribute::NoMerge))
124124 return false ;
125125
126- if (F->hasAvailableExternallyLinkage ()) {
126+ if (F->hasAvailableExternallyLinkage ())
127127 return false ;
128- }
129128
130- if (F->getFunctionType ()->isVarArg ()) {
129+ if (F->getFunctionType ()->isVarArg ())
131130 return false ;
132- }
133131
134132 if (F->getCallingConv () == CallingConv::SwiftTail)
135133 return false ;
136134
137- // if function contains callsites with musttail, if we merge
135+ // If function contains callsites with musttail, if we merge
138136 // it, the merged function will have the musttail callsite, but
139137 // the number of parameters can change, thus the parameter count
140138 // of the callsite will mismatch with the function itself.
141- // if (IgnoreMusttailFunction) {
142139 for (const BasicBlock &BB : *F) {
143140 for (const Instruction &I : BB) {
144141 const auto *CB = dyn_cast<CallBase>(&I);
@@ -180,7 +177,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
180177 return true ;
181178}
182179
183- // copy from merge functions.cpp
184180static Value *createCast (IRBuilder<> &Builder, Value *V, Type *DestTy) {
185181 Type *SrcTy = V->getType ();
186182 if (SrcTy->isStructTy ()) {
@@ -229,7 +225,8 @@ void GlobalMergeFunc::analyze(Module &M) {
229225
230226 auto FI = llvm::StructuralHashWithDifferences (Func, ignoreOp);
231227
232- // Convert the map to a vector for a serialization-friendly format.
228+ // Convert the operand map to a vector for a serialization-friendly
229+ // format.
233230 IndexOperandHashVecType IndexOperandHashes;
234231 for (auto &Pair : *FI.IndexOperandHashMap )
235232 IndexOperandHashes.emplace_back (Pair);
@@ -539,7 +536,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
539536 // This module check is not strictly necessary as the functions can move
540537 // around. We just want to avoid merging functions from different
541538 // modules than the first one in the functon map, as they may not end up
542- // with not being ICFed.
539+ // with not being ICFed by the linker .
543540 if (MergedModId != *FunctionMap->getNameForId (SF->ModuleNameId )) {
544541 ++NumMismatchedModuleIdGlobalMergeFunction;
545542 continue ;
@@ -560,12 +557,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
560557 dbgs () << " [GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
561558 << " in " << ModId << " \n " ;
562559 });
560+
563561 for (auto &FMI : FuncMergeInfos) {
564562 Changed = true ;
565563
566564 // We've already validated all locations of constant operands pointed by
567- // the parameters. Just use the first one to bookkeep the original
568- // constants for each parameter
565+ // the parameters. Populate parameters pointing to the original constants.
569566 SmallVector<Constant *> Params;
570567 SmallVector<Type *> ParamTypes;
571568 for (auto &ParamLocs : ParamLocsVec) {
@@ -577,8 +574,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
577574 ParamTypes.push_back (Opnd->getType ());
578575 }
579576
580- // Create a merged function derived from the first function in the current
581- // module context.
577+ // Create a merged function derived from the current function.
582578 Function *MergedFunc =
583579 createMergedFunction (FMI, ParamTypes, ParamLocsVec);
584580
@@ -589,7 +585,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
589585 MergedFunc->dump ();
590586 });
591587
592- // Create a thunk to the merged function.
588+ // Transform the current function into a thunk that calls the merged
589+ // function.
593590 createThunk (FMI, Params, MergedFunc);
594591 LLVM_DEBUG ({
595592 dbgs () << " [GlobalMergeFunc] Thunk generated: \n " ;
0 commit comments