Skip to content

Commit 6260ea8

Browse files
spupyrevmemfrob
authored andcommitted
using offsets for CG
Summary: Arc->AvgOffset can be used for function/block ordering to distinguish between calls from the beggining of a function and calls from the end of the function. This makes a difference for large functions. (cherry picked from FBD6094221)
1 parent e55381d commit 6260ea8

File tree

5 files changed

+69
-54
lines changed

5 files changed

+69
-54
lines changed

bolt/Passes/BinaryFunctionCallGraph.cpp

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,13 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
8989
BinaryFunctionCallGraph Cg;
9090
static constexpr auto COUNT_NO_PROFILE = BinaryBasicBlock::COUNT_NO_PROFILE;
9191

92+
// Compute function size
93+
auto functionSize = [&](const BinaryFunction *Function) {
94+
return UseFunctionHotSize && Function->isSplit()
95+
? Function->estimateHotSize(UseSplitHotSize)
96+
: Function->estimateSize();
97+
};
98+
9299
// Add call graph nodes.
93100
auto lookupNode = [&](BinaryFunction *Function) {
94101
const auto Id = Cg.maybeGetNodeId(Function);
@@ -97,9 +104,7 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
97104
// because emitFunctions will emit the hot part first in the order that is
98105
// computed by ReorderFunctions. The cold part will be emitted with the
99106
// rest of the cold functions and code.
100-
const auto Size = UseFunctionHotSize && Function->isSplit()
101-
? Function->estimateHotSize(UseSplitHotSize)
102-
: Function->estimateSize();
107+
const auto Size = functionSize(Function);
103108
// NOTE: for functions without a profile, we set the number of samples
104109
// to zero. This will keep these functions from appearing in the hot
105110
// section. This is a little weird because we wouldn't be trying to
@@ -125,14 +130,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
125130
for (auto &It : BFs) {
126131
auto *Function = &It.second;
127132

128-
if(Filter(*Function)) {
133+
if (Filter(*Function)) {
129134
continue;
130135
}
131136

132137
const auto *BranchData = Function->getBranchData();
133138
const auto SrcId = lookupNode(Function);
134-
uint64_t Offset = Function->getAddress();
135-
uint64_t LastInstSize = 0;
139+
// Offset of the current basic block from the beginning of the function
140+
uint64_t Offset = 0;
136141

137142
auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
138143
if (auto *DstFunc =
@@ -145,18 +150,19 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
145150
return false;
146151
}
147152
const auto DstId = lookupNode(DstFunc);
148-
const auto AvgDelta = UseEdgeCounts ? 0 : Offset - DstFunc->getAddress();
149153
const bool IsValidCount = Count != COUNT_NO_PROFILE;
150154
const auto AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
151-
if (!IsValidCount) ++NoProfileCallsites;
152-
Cg.incArcWeight(SrcId, DstId, AdjCount, AvgDelta);
155+
if (!IsValidCount)
156+
++NoProfileCallsites;
157+
Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
153158
DEBUG(
154159
if (opts::Verbosity > 1) {
155160
dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
156161
<< " -> " << *DstFunc << " @ " << Offset << "\n";
157162
});
158163
return true;
159164
}
165+
160166
return false;
161167
};
162168

@@ -209,8 +215,14 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
209215
DEBUG(dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
210216
<< " for " << *Function << "\n");
211217
++NumFallbacks;
218+
const auto Size = functionSize(Function);
212219
for (const auto &BI : BranchData->Data) {
213-
Offset = Function->getAddress() + BI.From.Offset;
220+
Offset = BI.From.Offset;
221+
// The computed offset may exceed the hot part of the function; hence,
222+
// bound it the size
223+
if (Offset > Size)
224+
Offset = Size;
225+
214226
const auto CI = getCallInfoFromBranchData(BI, true);
215227
if (!CI.first && CI.second == COUNT_NO_PROFILE) // probably a branch
216228
continue;
@@ -225,30 +237,38 @@ BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
225237
if (BB->isCold() && !IncludeColdCalls)
226238
continue;
227239

228-
for (auto &Inst : *BB) {
229-
if (!UseEdgeCounts) {
230-
Offset += LastInstSize;
231-
LastInstSize = BC.computeCodeSize(&Inst, &Inst + 1);
232-
}
240+
// Determine whether the block is included in Function's (hot) size
241+
// See BinaryFunction::estimateHotSize
242+
bool BBIncludedInFunctionSize = false;
243+
if (UseFunctionHotSize && Function->isSplit()) {
244+
if (UseSplitHotSize)
245+
BBIncludedInFunctionSize = !BB->isCold();
246+
else
247+
BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
248+
} else {
249+
BBIncludedInFunctionSize = true;
250+
}
233251

252+
for (auto &Inst : *BB) {
234253
// Find call instructions and extract target symbols from each one.
235-
if (!BC.MIA->isCall(Inst))
236-
continue;
237-
238-
const auto CallInfo = getCallInfo(BB, Inst);
254+
if (BC.MIA->isCall(Inst)) {
255+
const auto CallInfo = getCallInfo(BB, Inst);
239256

240-
if (CallInfo.empty()) {
241-
++TotalCallsites;
242-
++NotProcessed;
243-
continue;
244-
}
245-
246-
for (const auto &CI : CallInfo) {
247-
++TotalCallsites;
248-
if (!recordCall(CI.first, CI.second)) {
257+
if (!CallInfo.empty()) {
258+
for (const auto &CI : CallInfo) {
259+
++TotalCallsites;
260+
if (!recordCall(CI.first, CI.second))
261+
++NotProcessed;
262+
}
263+
} else {
264+
++TotalCallsites;
249265
++NotProcessed;
250266
}
251267
}
268+
// Increase Offset if needed
269+
if (BBIncludedInFunctionSize) {
270+
Offset += BC.computeCodeSize(&Inst, &Inst + 1);
271+
}
252272
}
253273
}
254274
}

bolt/Passes/BinaryFunctionCallGraph.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ inline bool NoFilter(const BinaryFunction &) { return false; }
6565
/// graph, otherwise they are ignored.
6666
/// UseFunctionHotSize controls whether the hot size of a function is used when
6767
/// filling in the Size attribute of new Nodes.
68-
/// UseEdgeCounts is used to control if the AvgCallOffset attribute on Arcs is
69-
/// computed using the offsets of call instructions.
68+
/// UseEdgeCounts is used to control if the Weight attribute on Arcs is computed
69+
/// using the number of calls.
7070
BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
7171
std::map<uint64_t, BinaryFunction> &BFs,
7272
CgFilterFunction Filter = NoFilter,

bolt/Passes/CallGraph.cpp

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ inline size_t hash_int64(int64_t k) {
4444
return hash_int64_fallback(k);
4545
#endif
4646
}
47-
47+
4848
inline size_t hash_int64_pair(int64_t k1, int64_t k2) {
4949
#if defined(USE_SSECRC) && defined(__SSE4_2__)
5050
// crc32 is commutative, so we need to perturb k1 so that (k1, k2) hashes
@@ -56,7 +56,7 @@ inline size_t hash_int64_pair(int64_t k1, int64_t k2) {
5656
return (hash_int64(k1) << 1) ^ hash_int64(k2);
5757
#endif
5858
}
59-
59+
6060
}
6161

6262
namespace llvm {
@@ -79,36 +79,31 @@ CallGraph::NodeId CallGraph::addNode(uint32_t Size, uint64_t Samples) {
7979

8080
const CallGraph::Arc &CallGraph::incArcWeight(NodeId Src, NodeId Dst, double W,
8181
double Offset) {
82+
assert(Offset <= size(Src) && "Call offset exceeds function size");
83+
8284
auto Res = Arcs.emplace(Src, Dst, W);
8385
if (!Res.second) {
8486
Res.first->Weight += W;
87+
Res.first->AvgCallOffset += Offset * W;
8588
return *Res.first;
8689
}
87-
Res.first->AvgCallOffset += Offset;
90+
Res.first->AvgCallOffset = Offset * W;
8891
Nodes[Src].Succs.push_back(Dst);
8992
Nodes[Dst].Preds.push_back(Src);
9093
return *Res.first;
9194
}
9295

93-
void CallGraph::normalizeArcWeights(bool UseEdgeCounts) {
94-
// Normalize arc weights.
95-
if (!UseEdgeCounts) {
96-
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
97-
auto& Func = getNode(FuncId);
98-
for (auto Caller : Func.predecessors()) {
99-
auto Arc = findArc(Caller, FuncId);
100-
Arc->NormalizedWeight = Arc->weight() / Func.samples();
96+
void CallGraph::normalizeArcWeights() {
97+
// Normalize arc weights
98+
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
99+
auto& Func = getNode(FuncId);
100+
for (auto Caller : Func.predecessors()) {
101+
auto Arc = findArc(Caller, FuncId);
102+
Arc->NormalizedWeight = Arc->weight() / Func.samples();
103+
if (Arc->weight() > 0)
101104
Arc->AvgCallOffset /= Arc->weight();
102-
assert(Arc->AvgCallOffset < size(Caller));
103-
}
104-
}
105-
} else {
106-
for (NodeId FuncId = 0; FuncId < numNodes(); ++FuncId) {
107-
auto &Func = getNode(FuncId);
108-
for (auto Caller : Func.predecessors()) {
109-
auto Arc = findArc(Caller, FuncId);
110-
Arc->NormalizedWeight = Arc->weight() / Func.samples();
111-
}
105+
assert(Arc->AvgCallOffset <= size(Caller) &&
106+
"Avg call offset exceeds function size");
112107
}
113108
}
114109
}

bolt/Passes/CallGraph.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ class CallGraph {
153153
return double(Arcs.size()) / (Nodes.size()*Nodes.size());
154154
}
155155

156-
void normalizeArcWeights(bool UseEdgeCounts);
156+
void normalizeArcWeights();
157157

158158
template <typename L>
159159
void printDot(char* fileName, L getLabel) const;

bolt/Passes/ReorderFunctions.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ namespace bolt {
134134

135135
using NodeId = CallGraph::NodeId;
136136
using Arc = CallGraph::Arc;
137-
using Node = CallGraph::Node;
137+
using Node = CallGraph::Node;
138138

139139
void ReorderFunctions::reorder(std::vector<Cluster> &&Clusters,
140140
std::map<uint64_t, BinaryFunction> &BFs) {
@@ -310,7 +310,7 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC,
310310
opts::CgUseSplitHotSize,
311311
opts::UseEdgeCounts,
312312
opts::CgIgnoreRecursiveCalls);
313-
Cg.normalizeArcWeights(opts::UseEdgeCounts);
313+
Cg.normalizeArcWeights();
314314
}
315315

316316
std::vector<Cluster> Clusters;

0 commit comments

Comments
 (0)