@@ -28,11 +28,12 @@ void VirtualUnwinder::unwindCall(UnwindState &State) {
28
28
// 2nd frame is in prolog/epilog. In the future, we will switch to
29
29
// pro/epi tracker(Dwarf CFI) for the precise check.
30
30
uint64_t Source = State.getCurrentLBRSource ();
31
- auto Iter = State.CallStack .begin ();
32
- if (State.CallStack .size () == 1 || *(++Iter) != Source) {
33
- State.CallStack .front () = Source;
31
+ auto *ParentFrame = State.getParentFrame ();
32
+ if (ParentFrame == State.getDummyRootPtr () ||
33
+ ParentFrame->Address != Source) {
34
+ State.switchToFrame (Source);
34
35
} else {
35
- State.CallStack . pop_front ();
36
+ State.popFrame ();
36
37
}
37
38
State.InstPtr .update (Source);
38
39
}
@@ -41,116 +42,140 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
41
42
InstructionPointer &IP = State.InstPtr ;
42
43
uint64_t Target = State.getCurrentLBRTarget ();
43
44
uint64_t End = IP.Address ;
44
- if (State.getBinary ()->usePseudoProbes ()) {
45
+ if (Binary->usePseudoProbes ()) {
46
+ // We don't need to top frame probe since it should be extracted
47
+ // from the range.
45
48
// The outcome of the virtual unwinding with pseudo probes is a
46
49
// map from a context key to the address range being unwound.
47
50
// This means basically linear unwinding is not needed for pseudo
48
51
// probes. The range will be simply recorded here and will be
49
52
// converted to a list of pseudo probes to report in ProfileGenerator.
50
- recordRangeCount (Target, End, State , Repeat);
53
+ State. getParentFrame ()-> recordRangeCount (Target, End, Repeat);
51
54
} else {
52
55
// Unwind linear execution part
56
+ uint64_t LeafAddr = State.CurrentLeafFrame ->Address ;
53
57
while (IP.Address >= Target) {
54
58
uint64_t PrevIP = IP.Address ;
55
59
IP.backward ();
56
60
// Break into segments for implicit call/return due to inlining
57
- bool SameInlinee =
58
- State.getBinary ()->inlineContextEqual (PrevIP, IP.Address );
61
+ bool SameInlinee = Binary->inlineContextEqual (PrevIP, IP.Address );
59
62
if (!SameInlinee || PrevIP == Target) {
60
- recordRangeCount (PrevIP, End, State, Repeat);
63
+ State.switchToFrame (LeafAddr);
64
+ State.CurrentLeafFrame ->recordRangeCount (PrevIP, End, Repeat);
61
65
End = IP.Address ;
62
66
}
63
- State. CallStack . front () = IP.Address ;
67
+ LeafAddr = IP.Address ;
64
68
}
65
69
}
66
70
}
67
71
68
72
void VirtualUnwinder::unwindReturn (UnwindState &State) {
69
73
// Add extra frame as we unwind through the return
70
74
const LBREntry &LBR = State.getCurrentLBR ();
71
- uint64_t CallAddr = State. getBinary () ->getCallAddrFromFrameAddr (LBR.Target );
72
- State.CallStack . front () = CallAddr;
73
- State.CallStack . push_front (LBR.Source );
75
+ uint64_t CallAddr = Binary ->getCallAddrFromFrameAddr (LBR.Target );
76
+ State.switchToFrame ( CallAddr) ;
77
+ State.pushFrame (LBR.Source );
74
78
State.InstPtr .update (LBR.Source );
75
79
}
76
80
77
81
void VirtualUnwinder::unwindBranchWithinFrame (UnwindState &State) {
78
82
// TODO: Tolerate tail call for now, as we may see tail call from libraries.
79
83
// This is only for intra function branches, excluding tail calls.
80
84
uint64_t Source = State.getCurrentLBRSource ();
81
- State.CallStack . front () = Source;
85
+ State.switchToFrame ( Source) ;
82
86
State.InstPtr .update (Source);
83
87
}
84
88
85
- SampleCounter &
86
- VirtualUnwinder::getOrCreateCounter (const ProfiledBinary *Binary,
87
- std::list<uint64_t > &CallStack) {
88
- if (Binary->usePseudoProbes ()) {
89
- return getOrCreateCounterForProbe (Binary, CallStack);
90
- }
89
+ std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey () {
91
90
std::shared_ptr<StringBasedCtxKey> KeyStr =
92
91
std::make_shared<StringBasedCtxKey>();
93
- KeyStr->Context = Binary->getExpandedContextStr (CallStack );
92
+ KeyStr->Context = Binary->getExpandedContextStr (Stack );
94
93
KeyStr->genHashCode ();
95
- auto Ret =
96
- CtxCounterMap->emplace (Hashable<ContextKey>(KeyStr), SampleCounter ());
97
- return Ret.first ->second ;
94
+ return KeyStr;
98
95
}
99
96
100
- SampleCounter &
101
- VirtualUnwinder::getOrCreateCounterForProbe (const ProfiledBinary *Binary,
102
- std::list<uint64_t > &CallStack) {
97
+ std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey () {
103
98
std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
104
99
std::make_shared<ProbeBasedCtxKey>();
105
- if (CallStack.size () > 1 ) {
106
- // We don't need to top frame probe since it should be extracted
107
- // from the range.
108
- // The top of stack is an instruction from the function where
109
- // the LBR address range physcially resides. Strip it since
110
- // the function is not a part of the call context. We also
111
- // don't need its inline context since the probes being unwound
112
- // come with an inline context all the way back to the uninlined
113
- // function in their prefix tree.
114
- auto Iter = CallStack.rbegin ();
115
- auto EndT = std::prev (CallStack.rend ());
116
- for (; Iter != EndT; Iter++) {
117
- uint64_t Address = *Iter;
118
- const PseudoProbe *CallProbe = Binary->getCallProbeForAddr (Address);
119
- // We may not find a probe for a merged or external callsite.
120
- // Callsite merging may cause the loss of original probe IDs.
121
- // Cutting off the context from here since the inline will
122
- // not know how to consume a context with unknown callsites.
123
- if (!CallProbe)
124
- break ;
125
- ProbeBasedKey->Probes .emplace_back (CallProbe);
126
- }
100
+ for (auto CallProbe : Stack) {
101
+ ProbeBasedKey->Probes .emplace_back (CallProbe);
127
102
}
128
103
CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
129
104
ProbeBasedKey->Probes );
130
105
ProbeBasedKey->genHashCode ();
131
- Hashable<ContextKey> ContextId (ProbeBasedKey);
132
- auto Ret = CtxCounterMap->emplace (ContextId, SampleCounter ());
133
- return Ret.first ->second ;
106
+ return ProbeBasedKey;
107
+ }
108
+
109
+ template <typename T>
110
+ void VirtualUnwinder::collectSamplesFromFrame (UnwindState::ProfiledFrame *Cur,
111
+ T &Stack) {
112
+ if (Cur->RangeSamples .empty () && Cur->BranchSamples .empty ())
113
+ return ;
114
+
115
+ std::shared_ptr<ContextKey> Key = Stack.getContextKey ();
116
+ auto Ret = CtxCounterMap->emplace (Hashable<ContextKey>(Key), SampleCounter ());
117
+ SampleCounter &SCounter = Ret.first ->second ;
118
+ for (auto &Item : Cur->RangeSamples ) {
119
+ uint64_t StartOffset = Binary->virtualAddrToOffset (std::get<0 >(Item));
120
+ uint64_t EndOffset = Binary->virtualAddrToOffset (std::get<1 >(Item));
121
+ SCounter.recordRangeCount (StartOffset, EndOffset, std::get<2 >(Item));
122
+ }
123
+
124
+ for (auto &Item : Cur->BranchSamples ) {
125
+ uint64_t SourceOffset = Binary->virtualAddrToOffset (std::get<0 >(Item));
126
+ uint64_t TargetOffset = Binary->virtualAddrToOffset (std::get<1 >(Item));
127
+ SCounter.recordBranchCount (SourceOffset, TargetOffset, std::get<2 >(Item));
128
+ }
129
+ }
130
+
131
+ template <typename T>
132
+ void VirtualUnwinder::collectSamplesFromFrameTrie (
133
+ UnwindState::ProfiledFrame *Cur, T &Stack) {
134
+ if (!Cur->isDummyRoot ()) {
135
+ if (!Stack.pushFrame (Cur)) {
136
+ // Process truncated context
137
+ for (const auto &Item : Cur->Children ) {
138
+ // Start a new traversal ignoring its bottom context
139
+ collectSamplesFromFrameTrie (Item.second .get ());
140
+ }
141
+ return ;
142
+ }
143
+ }
144
+
145
+ collectSamplesFromFrame (Cur, Stack);
146
+ // Process children frame
147
+ for (const auto &Item : Cur->Children ) {
148
+ collectSamplesFromFrameTrie (Item.second .get (), Stack);
149
+ }
150
+ // Recover the call stack
151
+ Stack.popFrame ();
134
152
}
135
153
136
- void VirtualUnwinder::recordRangeCount (uint64_t Start, uint64_t End,
137
- UnwindState &State, uint64_t Repeat) {
138
- uint64_t StartOffset = State.getBinary ()->virtualAddrToOffset (Start);
139
- uint64_t EndOffset = State.getBinary ()->virtualAddrToOffset (End);
140
- SampleCounter &SCounter =
141
- getOrCreateCounter (State.getBinary (), State.CallStack );
142
- SCounter.recordRangeCount (StartOffset, EndOffset, Repeat);
154
+ void VirtualUnwinder::collectSamplesFromFrameTrie (
155
+ UnwindState::ProfiledFrame *Cur) {
156
+ if (Binary->usePseudoProbes ()) {
157
+ ProbeStack Stack (Binary);
158
+ collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
159
+ } else {
160
+ FrameStack Stack (Binary);
161
+ collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
162
+ }
143
163
}
144
164
145
165
void VirtualUnwinder::recordBranchCount (const LBREntry &Branch,
146
166
UnwindState &State, uint64_t Repeat) {
147
167
if (Branch.IsArtificial )
148
168
return ;
149
- uint64_t SourceOffset = State.getBinary ()->virtualAddrToOffset (Branch.Source );
150
- uint64_t TargetOffset = State.getBinary ()->virtualAddrToOffset (Branch.Target );
151
- SampleCounter &SCounter =
152
- getOrCreateCounter (State.getBinary (), State.CallStack );
153
- SCounter.recordBranchCount (SourceOffset, TargetOffset, Repeat);
169
+
170
+ if (Binary->usePseudoProbes ()) {
171
+ // Same as recordRangeCount, We don't need to top frame probe since we will
172
+ // extract it from branch's source address
173
+ State.getParentFrame ()->recordBranchCount (Branch.Source , Branch.Target ,
174
+ Repeat);
175
+ } else {
176
+ State.CurrentLeafFrame ->recordBranchCount (Branch.Source , Branch.Target ,
177
+ Repeat);
178
+ }
154
179
}
155
180
156
181
bool VirtualUnwinder::unwind (const HybridSample *Sample, uint64_t Repeat) {
@@ -199,6 +224,8 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
199
224
// Record `branch` with calling context after unwinding.
200
225
recordBranchCount (Branch, State, Repeat);
201
226
}
227
+ // As samples are aggregated on trie, record them into counter map
228
+ collectSamplesFromFrameTrie (State.getDummyRootPtr ());
202
229
203
230
return true ;
204
231
}
@@ -325,7 +352,8 @@ void PerfReader::printUnwinderOutput() {
325
352
void PerfReader::unwindSamples () {
326
353
for (const auto &Item : AggregatedSamples) {
327
354
const HybridSample *Sample = dyn_cast<HybridSample>(Item.first .getPtr ());
328
- VirtualUnwinder Unwinder (&BinarySampleCounters[Sample->Binary ]);
355
+ VirtualUnwinder Unwinder (&BinarySampleCounters[Sample->Binary ],
356
+ Sample->Binary );
329
357
Unwinder.unwind (Sample, Item.second );
330
358
}
331
359
@@ -334,7 +362,7 @@ void PerfReader::unwindSamples() {
334
362
}
335
363
336
364
bool PerfReader::extractLBRStack (TraceStream &TraceIt,
337
- SmallVector <LBREntry, 16 > &LBRStack,
365
+ SmallVectorImpl <LBREntry> &LBRStack,
338
366
ProfiledBinary *Binary) {
339
367
// The raw format of LBR stack is like:
340
368
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
@@ -398,7 +426,7 @@ bool PerfReader::extractLBRStack(TraceStream &TraceIt,
398
426
}
399
427
400
428
bool PerfReader::extractCallstack (TraceStream &TraceIt,
401
- std::list <uint64_t > &CallStack) {
429
+ SmallVectorImpl <uint64_t > &CallStack) {
402
430
// The raw format of call stack is like:
403
431
// 4005dc # leaf frame
404
432
// 400634
0 commit comments