Skip to content

Commit 78b35e2

Browse files
wlei-llvmtstellar
authored andcommitted
[CSSPGO][llvm-profgen] Pseudo probe based CS profile generation
This change implements profile generation infra for pseudo probe in llvm-profgen. During virtual unwinding, the raw profile is extracted into range counter and branch counter and aggregated to sample counter map indexed by the call stack context. This change introduces the last step and produces the eventual profile. Specifically, the body of function sample is recorded by going through each probe among the range and callsite target sample is recorded by extracting the callsite probe from branch's source. Please refer https://groups.google.com/g/llvm-dev/c/1p1rdYbL93s and https://reviews.llvm.org/D89707 for more context about CSSPGO and llvm-profgen. **Implementation** - Extended `PseudoProbeProfileGenerator` for pseudo probe based profile generation. - `populateBodySamplesWithProbes` reading range counter is responsible for recording function body samples and inferring caller's body samples. - `populateBoundarySamplesWithProbes` reading branch counter is responsible for recording call site target samples. - Each sample is recorded with its calling context(named `ContextId`). Remind that the probe based context key doesn't include the leaf frame probe info, so the `ContextId` string is created from two part: one from the probe stack strings' concatenation and other one from the leaf frame probe. - Added regression test Test Plan: ninja & ninja check-llvm Differential Revision: https://reviews.llvm.org/D92998
1 parent a7629a2 commit 78b35e2

File tree

8 files changed

+307
-29
lines changed

8 files changed

+307
-29
lines changed

llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,21 @@
11
; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
2+
; RUN: FileCheck %s --input-file %t
3+
4+
; CHECK: [main:2 @ foo]:74:0
5+
; CHECK-NEXT: 2: 15
6+
; CHECK-NEXT: 3: 15
7+
; CHECK-NEXT: 4: 14
8+
; CHECK-NEXT: 5: 1
9+
; CHECK-NEXT: 6: 15
10+
; CHECK-NEXT: 8: 14 bar:14
11+
; CHECK-NEXT: !CFGChecksum: 138950591924
12+
; CHECK-NEXT:[main:2 @ foo:8 @ bar]:56:14
13+
; CHECK-NEXT: 1: 14
14+
; CHECK-NEXT: 2: 14
15+
; CHECK-NEXT: 3: 14
16+
; CHECK-NEXT: 4: 14
17+
; CHECK-NEXT: !CFGChecksum: 72617220756
18+
219

320
; CHECK-UNWINDER: Binary(inline-cs-pseudoprobe.perfbin)'s Range Counter:
421
; CHECK-UNWINDER-EMPTY:

llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
11
; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
2+
; RUN: FileCheck %s --input-file %t
3+
4+
; CHECK: [main:2 @ foo]:75:0
5+
; CHECK-NEXT: 2: 15
6+
; CHECK-NEXT: 3: 15
7+
; CHECK-NEXT: 4: 15
8+
; CHECK-NEXT: 6: 15
9+
; CHECK-NEXT: 8: 15 bar:15
10+
; CHECK-NEXT: !CFGChecksum: 138950591924
11+
; CHECK-NEXT:[main:2 @ foo:8 @ bar]:60:15
12+
; CHECK-NEXT: 1: 15
13+
; CHECK-NEXT: 2: 15
14+
; CHECK-NEXT: 3: 15
15+
; CHECK-NEXT: 4: 15
16+
; CHECK-NEXT: !CFGChecksum: 72617220756
17+
218

319
; CHECK-UNWINDER: Binary(noinline-cs-pseudoprobe.perfbin)'s Range Counter:
420
; CHECK-UNWINDER-NEXT: main:2

llvm/tools/llvm-profgen/PerfReader.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -567,11 +567,7 @@ void PerfReader::checkAndSetPerfType(
567567
}
568568

569569
if (HasHybridPerf) {
570-
// Set up ProfileIsCS to enable context-sensitive functionalities
571-
// in SampleProf
572-
FunctionSamples::ProfileIsCS = true;
573570
PerfType = PERF_LBR_STACK;
574-
575571
} else {
576572
// TODO: Support other type of perf script
577573
PerfType = PERF_INVILID;

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 196 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
6767
/*
6868
Regions may overlap with each other. Using the boundary info, find all
6969
disjoint ranges and their sample count. BoundaryPoint contains the count
70-
mutiple samples begin/end at this points.
70+
multiple samples begin/end at this points.
7171
7272
|<--100-->| Sample1
7373
|<------200------>| Sample2
@@ -264,9 +264,12 @@ static FrameLocation getCallerContext(StringRef CalleeContext,
264264
StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
265265
CallerNameWithContext = CallerContext.rsplit(':').first;
266266
auto ContextSplit = CallerContext.rsplit(" @ ");
267+
StringRef CallerFrameStr = ContextSplit.second.size() == 0
268+
? ContextSplit.first
269+
: ContextSplit.second;
267270
FrameLocation LeafFrameLoc = {"", {0, 0}};
268271
StringRef Funcname;
269-
SampleContext::decodeContextString(ContextSplit.second, Funcname,
272+
SampleContext::decodeContextString(CallerFrameStr, Funcname,
270273
LeafFrameLoc.second);
271274
LeafFrameLoc.first = Funcname.str();
272275
return LeafFrameLoc;
@@ -316,5 +319,196 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
316319
}
317320
}
318321

322+
// Helper function to extract context prefix
323+
// PrefixContextId is the context id string except for the leaf probe's
324+
// context, the final ContextId will be:
325+
// ContextId = PrefixContextId + LeafContextId;
326+
// Remind that the string in ContextStrStack is in callee-caller order
327+
// So process the string vector reversely
328+
static std::string
329+
extractPrefixContextId(const SmallVector<const PseudoProbe *, 16> &Probes,
330+
ProfiledBinary *Binary) {
331+
SmallVector<std::string, 16> ContextStrStack;
332+
for (const auto *P : Probes) {
333+
Binary->getInlineContextForProbe(P, ContextStrStack, true);
334+
}
335+
std::ostringstream OContextStr;
336+
for (auto &CxtStr : ContextStrStack) {
337+
if (OContextStr.str().size())
338+
OContextStr << " @ ";
339+
OContextStr << CxtStr;
340+
}
341+
return OContextStr.str();
342+
}
343+
344+
void PseudoProbeCSProfileGenerator::generateProfile() {
345+
// Enable CS and pseudo probe functionalities in SampleProf
346+
FunctionSamples::ProfileIsCS = true;
347+
FunctionSamples::ProfileIsProbeBased = true;
348+
for (const auto &BI : BinarySampleCounters) {
349+
ProfiledBinary *Binary = BI.first;
350+
for (const auto &CI : BI.second) {
351+
const ProbeBasedCtxKey *CtxKey =
352+
dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
353+
std::string PrefixContextId =
354+
extractPrefixContextId(CtxKey->Probes, Binary);
355+
// Fill in function body samples from probes, also infer caller's samples
356+
// from callee's probe
357+
populateBodySamplesWithProbes(CI.second.RangeCounter, PrefixContextId,
358+
Binary);
359+
// Fill in boundary samples for a call probe
360+
populateBoundarySamplesWithProbes(CI.second.BranchCounter,
361+
PrefixContextId, Binary);
362+
}
363+
}
364+
}
365+
366+
void PseudoProbeCSProfileGenerator::extractProbesFromRange(
367+
const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
368+
ProfiledBinary *Binary) {
369+
RangeSample Ranges;
370+
findDisjointRanges(Ranges, RangeCounter);
371+
for (const auto &Range : Ranges) {
372+
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
373+
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
374+
uint64_t Count = Range.second;
375+
// Disjoint ranges have introduce zero-filled gap that
376+
// doesn't belong to current context, filter them out.
377+
if (Count == 0)
378+
continue;
379+
380+
InstructionPointer IP(Binary, RangeBegin, true);
381+
382+
// Disjoint ranges may have range in the middle of two instr,
383+
// e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
384+
// can be Addr1+1 to Addr2-1. We should ignore such range.
385+
if (IP.Address > RangeEnd)
386+
continue;
387+
388+
while (IP.Address <= RangeEnd) {
389+
const AddressProbesMap &Address2ProbesMap =
390+
Binary->getAddress2ProbesMap();
391+
auto It = Address2ProbesMap.find(IP.Address);
392+
if (It != Address2ProbesMap.end()) {
393+
for (const auto &Probe : It->second) {
394+
if (!Probe.isBlock())
395+
continue;
396+
ProbeCounter[&Probe] += Count;
397+
}
398+
}
399+
400+
IP.advance();
401+
}
402+
}
403+
}
404+
405+
void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
406+
const RangeSample &RangeCounter, StringRef PrefixContextId,
407+
ProfiledBinary *Binary) {
408+
ProbeCounterMap ProbeCounter;
409+
// Extract the top frame probes by looking up each address among the range in
410+
// the Address2ProbeMap
411+
extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
412+
for (auto PI : ProbeCounter) {
413+
const PseudoProbe *Probe = PI.first;
414+
uint64_t Count = PI.second;
415+
FunctionSamples &FunctionProfile =
416+
getFunctionProfileForLeafProbe(PrefixContextId, Probe, Binary);
417+
418+
FunctionProfile.addBodySamples(Probe->Index, 0, Count);
419+
FunctionProfile.addTotalSamples(Count);
420+
if (Probe->isEntry()) {
421+
FunctionProfile.addHeadSamples(Count);
422+
// Look up for the caller's function profile
423+
const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
424+
if (InlinerDesc != nullptr) {
425+
// Since the context id will be compressed, we have to use callee's
426+
// context id to infer caller's context id to ensure they share the
427+
// same context prefix.
428+
StringRef CalleeContextId =
429+
FunctionProfile.getContext().getNameWithContext(true);
430+
StringRef CallerContextId;
431+
FrameLocation &&CallerLeafFrameLoc =
432+
getCallerContext(CalleeContextId, CallerContextId);
433+
uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
434+
assert(CallerIndex &&
435+
"Inferred caller's location index shouldn't be zero!");
436+
FunctionSamples &CallerProfile =
437+
getFunctionProfileForContext(CallerContextId);
438+
CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
439+
CallerProfile.addBodySamples(CallerIndex, 0, Count);
440+
CallerProfile.addTotalSamples(Count);
441+
CallerProfile.addCalledTargetSamples(CallerIndex, 0,
442+
FunctionProfile.getName(), Count);
443+
}
444+
}
445+
}
446+
}
447+
448+
void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
449+
const BranchSample &BranchCounter, StringRef PrefixContextId,
450+
ProfiledBinary *Binary) {
451+
for (auto BI : BranchCounter) {
452+
uint64_t SourceOffset = BI.first.first;
453+
uint64_t TargetOffset = BI.first.second;
454+
uint64_t Count = BI.second;
455+
uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
456+
const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
457+
if (CallProbe == nullptr)
458+
continue;
459+
FunctionSamples &FunctionProfile =
460+
getFunctionProfileForLeafProbe(PrefixContextId, CallProbe, Binary);
461+
FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
462+
FunctionProfile.addTotalSamples(Count);
463+
StringRef CalleeName = FunctionSamples::getCanonicalFnName(
464+
Binary->getFuncFromStartOffset(TargetOffset));
465+
if (CalleeName.size() == 0)
466+
continue;
467+
FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
468+
Count);
469+
}
470+
}
471+
472+
FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
473+
StringRef PrefixContextId, SmallVector<std::string, 16> &LeafInlinedContext,
474+
const PseudoProbeFuncDesc *LeafFuncDesc) {
475+
assert(LeafInlinedContext.size() &&
476+
"Profile context must have the leaf frame");
477+
std::ostringstream OContextStr;
478+
OContextStr << PrefixContextId.str();
479+
480+
for (uint32_t I = 0; I < LeafInlinedContext.size() - 1; I++) {
481+
if (OContextStr.str().size())
482+
OContextStr << " @ ";
483+
OContextStr << LeafInlinedContext[I];
484+
}
485+
// For leaf inlined context with the top frame, we should strip off the top
486+
// frame's probe id, like:
487+
// Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
488+
if (OContextStr.str().size())
489+
OContextStr << " @ ";
490+
StringRef LeafLoc = LeafInlinedContext.back();
491+
OContextStr << LeafLoc.split(":").first.str();
492+
493+
FunctionSamples &FunctionProile =
494+
getFunctionProfileForContext(OContextStr.str());
495+
FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
496+
return FunctionProile;
497+
}
498+
499+
FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
500+
StringRef PrefixContextId, const PseudoProbe *LeafProbe,
501+
ProfiledBinary *Binary) {
502+
SmallVector<std::string, 16> LeafInlinedContext;
503+
Binary->getInlineContextForProbe(LeafProbe, LeafInlinedContext);
504+
// Note that the context from probe doesn't include leaf frame,
505+
// hence we need to retrieve and append the leaf frame.
506+
const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
507+
LeafInlinedContext.emplace_back(FuncDesc->FuncName + ":" +
508+
Twine(LeafProbe->Index).str());
509+
return getFunctionProfileForLeafProbe(PrefixContextId, LeafInlinedContext,
510+
FuncDesc);
511+
}
512+
319513
} // end namespace sampleprof
320514
} // end namespace llvm

llvm/tools/llvm-profgen/ProfileGenerator.h

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class ProfileGenerator {
2525
ProfileGenerator(){};
2626
virtual ~ProfileGenerator() = default;
2727
static std::unique_ptr<ProfileGenerator>
28-
create(const BinarySampleCounterMap &SampleCounters,
28+
create(const BinarySampleCounterMap &BinarySampleCounters,
2929
enum PerfScriptType SampleType);
3030
virtual void generateProfile() = 0;
3131

@@ -50,7 +50,6 @@ class ProfileGenerator {
5050
*/
5151
void findDisjointRanges(RangeSample &DisjointRanges,
5252
const RangeSample &Ranges);
53-
5453
// Used by SampleProfileWriter
5554
StringMap<FunctionSamples> ProfileMap;
5655
};
@@ -65,6 +64,8 @@ class CSProfileGenerator : public ProfileGenerator {
6564

6665
public:
6766
void generateProfile() override {
67+
// Enable context-sensitive functionalities in SampleProf
68+
FunctionSamples::ProfileIsCS = true;
6869
for (const auto &BI : BinarySampleCounters) {
6970
ProfiledBinary *Binary = BI.first;
7071
for (const auto &CI : BI.second) {
@@ -90,14 +91,16 @@ class CSProfileGenerator : public ProfileGenerator {
9091
populateInferredFunctionSamples();
9192
}
9293

94+
protected:
95+
// Lookup or create FunctionSamples for the context
96+
FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
97+
9398
private:
9499
// Helper function for updating body sample for a leaf location in
95100
// FunctionProfile
96101
void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
97102
const FrameLocation &LeafLoc,
98103
uint64_t Count);
99-
// Lookup or create FunctionSamples for the context
100-
FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
101104
void populateFunctionBodySamples(FunctionSamples &FunctionProfile,
102105
const RangeSample &RangeCounters,
103106
ProfiledBinary *Binary);
@@ -108,14 +111,38 @@ class CSProfileGenerator : public ProfileGenerator {
108111
void populateInferredFunctionSamples();
109112
};
110113

114+
using ProbeCounterMap = std::unordered_map<const PseudoProbe *, uint64_t>;
115+
111116
class PseudoProbeCSProfileGenerator : public CSProfileGenerator {
112117

113118
public:
114119
PseudoProbeCSProfileGenerator(const BinarySampleCounterMap &Counters)
115120
: CSProfileGenerator(Counters) {}
116-
void generateProfile() override {
117-
// TODO
118-
}
121+
void generateProfile() override;
122+
123+
private:
124+
// Go through each address from range to extract the top frame probe by
125+
// looking up in the Address2ProbeMap
126+
void extractProbesFromRange(const RangeSample &RangeCounter,
127+
ProbeCounterMap &ProbeCounter,
128+
ProfiledBinary *Binary);
129+
// Fill in function body samples from probes
130+
void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
131+
StringRef PrefixContextId,
132+
ProfiledBinary *Binary);
133+
// Fill in boundary samples for a call probe
134+
void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
135+
StringRef PrefixContextId,
136+
ProfiledBinary *Binary);
137+
// Helper function to get FunctionSamples for the leaf inlined context
138+
FunctionSamples &getFunctionProfileForLeafProbe(
139+
StringRef PrefixContextId,
140+
SmallVector<std::string, 16> &LeafInlinedContext,
141+
const PseudoProbeFuncDesc *LeafFuncDesc);
142+
// Helper function to get FunctionSamples for the leaf probe
143+
FunctionSamples &getFunctionProfileForLeafProbe(StringRef PrefixContextId,
144+
const PseudoProbe *LeafProbe,
145+
ProfiledBinary *Binary);
119146
};
120147

121148
} // end namespace sampleprof

llvm/tools/llvm-profgen/ProfiledBinary.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,10 +244,19 @@ class ProfiledBinary {
244244
void
245245
getInlineContextForProbe(const PseudoProbe *Probe,
246246
SmallVector<std::string, 16> &InlineContextStack,
247-
bool IncludeLeaf) const {
247+
bool IncludeLeaf = false) const {
248248
return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack,
249249
IncludeLeaf);
250250
}
251+
const AddressProbesMap &getAddress2ProbesMap() const {
252+
return ProbeDecoder.getAddress2ProbesMap();
253+
}
254+
const PseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
255+
return ProbeDecoder.getFuncDescForGUID(GUID);
256+
}
257+
const PseudoProbeFuncDesc *getInlinerDescForProbe(const PseudoProbe *Probe) {
258+
return ProbeDecoder.getInlinerDescForProbe(Probe);
259+
}
251260
};
252261

253262
} // end namespace sampleprof

0 commit comments

Comments
 (0)