Skip to content

Commit f07915c

Browse files
committed
[vm] Intern callstacks when writing Perfetto profile
This significantly reduces the space occupied by callstacks e.g. 5 second recording of fibonacci computation goes from 1.5Mb to 600Kb. Also fixes flakiness of pkg/vm_service/test/get_perfetto_cpu_samples_rpc_test. Fixes #54401 TEST=ci Change-Id: I6202ce7e87ceb64f4330b7052e72e519a848e571 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/417521 Reviewed-by: Derek Xu <[email protected]>
1 parent f2614d2 commit f07915c

File tree

3 files changed

+91
-56
lines changed

3 files changed

+91
-56
lines changed

pkg/vm_service/test/get_perfetto_cpu_samples_rpc_test.dart

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,19 @@ Iterable<PerfSample> extractPerfSamplesFromTracePackets(
6363
final tests = <IsolateTest>[
6464
hasStoppedAtExit,
6565
(VmService service, IsolateRef isolateRef) async {
66+
// This test is calling |getPerfettoCpuSamples| twice: with and without
67+
// filter and expects to get consistent results. However profiler will
68+
// continue running and recording samples while this is happening which
69+
// means the buffer can overflow and old samples will be lost.
70+
//
71+
// The best way to avoid this would be to pause the profiler, but
72+
// service API does not provide a way to do it: setting profiler flag to
73+
// false will stop profiler entirely and discard accumulated samples.
74+
//
75+
// Instead we resort to bumping profile_period to 1h to stabilize this
76+
// test.
77+
await service.setFlag('profile_period', '60000000');
78+
6679
final result = await service.getPerfettoCpuSamples(isolateRef.id!);
6780
expect(result.type, 'PerfettoCpuSamples');
6881
expect(result.samplePeriod, isPositive);
@@ -77,18 +90,52 @@ final tests = <IsolateTest>[
7790
int frameCount = 0;
7891
int callstackCount = 0;
7992
int perfSampleCount = 0;
93+
final seenFrames = <int>{};
94+
final seenCallstacks = <int>{};
8095
for (final packet in packets) {
96+
if (packet.sequenceFlags &
97+
TracePacket_SequenceFlags.SEQ_INCREMENTAL_STATE_CLEARED.value !=
98+
0) {
99+
seenCallstacks.clear();
100+
}
101+
81102
if (packet.hasInternedData()) {
82103
frameCount += packet.internedData.frames.length;
83104
callstackCount += packet.internedData.callstacks.length;
105+
106+
for (var frame in packet.internedData.frames) {
107+
expect(frame.iid.toInt(), isNot(0));
108+
seenFrames.add(frame.iid.toInt());
109+
}
110+
for (var callstack in packet.internedData.callstacks) {
111+
for (var frame in callstack.frameIds) {
112+
expect(frame.toInt(), isIn(seenFrames));
113+
}
114+
115+
expect(callstack.iid.toInt(), isNot(0));
116+
seenCallstacks.add(callstack.iid.toInt());
117+
}
84118
}
85119
if (packet.hasPerfSample()) {
86120
perfSampleCount += 1;
121+
122+
// Check that packet correctly indicates dependency on incremental
123+
// state and that incremental state contains interned callstack
124+
// for this sample.
125+
expect(
126+
packet.sequenceFlags &
127+
TracePacket_SequenceFlags.SEQ_NEEDS_INCREMENTAL_STATE.value !=
128+
0,
129+
isTrue,
130+
);
131+
expect(packet.perfSample.callstackIid.toInt(), isIn(seenCallstacks));
87132
}
88133
}
89134
expect(frameCount, isPositive);
90135
expect(callstackCount, isPositive);
91-
expect(perfSampleCount, callstackCount);
136+
// Note: we will have more perfSampleCount than we have callstacks because
137+
// callstacks are interned.
138+
expect(perfSampleCount, greaterThanOrEqualTo(callstackCount));
92139

93140
// Calculate the time window of events.
94141
final timeOriginNanos = computeTimeOriginNanos(packets);

runtime/vm/profiler_service.cc

Lines changed: 43 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1665,13 +1665,14 @@ void Profile::ProcessSampleFrameJSON(JSONArray* stack,
16651665
}
16661666

16671667
#if defined(SUPPORT_PERFETTO) && !defined(PRODUCT)
1668-
void Profile::ProcessSampleFramePerfetto(
1669-
perfetto::protos::pbzero::Callstack* callstack,
1670-
ProfileCodeInlinedFunctionsCache* cache,
1671-
ProcessedSample* sample,
1672-
intptr_t frame_index) {
1668+
namespace {
1669+
void ProcessSampleFramePerfetto(Profile* profile,
1670+
GrowableArray<uint64_t>& callstack,
1671+
ProfileCodeInlinedFunctionsCache* cache,
1672+
ProcessedSample* sample,
1673+
intptr_t frame_index) {
16731674
const uword pc = sample->At(frame_index);
1674-
ProfileCode* profile_code = GetCodeFromPC(pc, sample->timestamp());
1675+
ProfileCode* profile_code = profile->GetCodeFromPC(pc, sample->timestamp());
16751676
ASSERT(profile_code != nullptr);
16761677
ProfileFunction* function = profile_code->function();
16771678
ASSERT(function != nullptr);
@@ -1691,51 +1692,28 @@ void Profile::ProcessSampleFramePerfetto(
16911692
code ^= profile_code->code().ptr();
16921693
cache->Get(pc, code, sample, frame_index, &inlined_functions,
16931694
&inlined_token_positions, &token_position);
1694-
if (FLAG_trace_profiler_verbose && (inlined_functions != NULL)) {
1695-
for (intptr_t i = 0; i < inlined_functions->length(); i++) {
1696-
const String& name =
1697-
String::Handle((*inlined_functions)[i]->QualifiedScrubbedName());
1698-
THR_Print("InlinedFunction[%" Pd "] = {%s, %s}\n", i, name.ToCString(),
1699-
(*inlined_token_positions)[i].ToCString());
1700-
}
1701-
}
17021695
}
17031696

17041697
if (code.IsNull() || (inlined_functions == nullptr) ||
17051698
(inlined_functions->length() <= 1)) {
17061699
// This is the ID of a |Frame| that was added to the interned data table in
17071700
// |ProfilerService::PrintProfilePerfetto|. See the comments in that method
17081701
// for more details.
1709-
callstack->add_frame_ids(function->table_index() + 1);
1702+
callstack.Add(function->table_index() + 1);
17101703
return;
17111704
}
17121705

1713-
if (!code.is_optimized()) {
1714-
OS::PrintErr("Code that should be optimized is not. Please file a bug\n");
1715-
OS::PrintErr("Code object: %s\n", code.ToCString());
1716-
OS::PrintErr("Inlined functions length: %" Pd "\n",
1717-
inlined_functions->length());
1718-
for (intptr_t i = 0; i < inlined_functions->length(); i++) {
1719-
OS::PrintErr("IF[%" Pd "] = %s\n", i,
1720-
(*inlined_functions)[i]->ToFullyQualifiedCString());
1721-
}
1722-
}
1723-
1724-
ASSERT(code.is_optimized());
1725-
17261706
for (intptr_t i = 0; i < inlined_functions->length(); ++i) {
17271707
const Function* inlined_function = (*inlined_functions)[i];
17281708
ASSERT(inlined_function != NULL);
17291709
ASSERT(!inlined_function->IsNull());
17301710
ProfileFunction* profile_function =
1731-
functions_->LookupOrAdd(*inlined_function);
1711+
profile->FindFunction(*inlined_function);
17321712
ASSERT(profile_function != NULL);
1733-
// This is the ID of a |Frame| that was added to the interned data table in
1734-
// |ProfilerService::PrintProfilePerfetto|. See the comments in that method
1735-
// for more details.
1736-
callstack->add_frame_ids(profile_function->table_index() + 1);
1713+
callstack.Add(profile_function->table_index() + 1);
17371714
}
17381715
}
1716+
} // namespace
17391717
#endif // defined(SUPPORT_PERFETTO) && !defined(PRODUCT)
17401718

17411719
void Profile::ProcessInlinedFunctionFrameJSON(
@@ -1824,13 +1802,32 @@ void Profile::PrintSamplesPerfetto(
18241802
ASSERT(packet_ptr != nullptr);
18251803
auto& packet = *packet_ptr;
18261804

1805+
perfetto_utils::BytesInterner<uint64_t, Zone> callstack_interner(zone_);
1806+
GrowableArray<uint64_t> callstack(128);
1807+
18271808
// Note that |cache| is zone-allocated, so it does not need to be deallocated
18281809
// manually.
18291810
auto* cache = new ProfileCodeInlinedFunctionsCache();
18301811
for (intptr_t sample_index = 0; sample_index < samples_->length();
18311812
++sample_index) {
18321813
ProcessedSample* sample = samples_->At(sample_index);
18331814

1815+
// Walk the sampled PCs and intern the stack.
1816+
callstack.Clear();
1817+
for (intptr_t frame_index = sample->length() - 1; frame_index >= 0;
1818+
--frame_index) {
1819+
ASSERT(sample->At(frame_index) != 0);
1820+
ProcessSampleFramePerfetto(this, callstack, cache, sample, frame_index);
1821+
}
1822+
1823+
// Empty sample (everything is invisible).
1824+
if (callstack.is_empty()) {
1825+
continue;
1826+
}
1827+
1828+
const auto callstack_iid =
1829+
callstack_interner.Intern(&callstack[0], callstack.length());
1830+
18341831
perfetto_utils::SetTrustedPacketSequenceId(packet.get());
18351832
// We set this flag to indicate that this packet reads from the interned
18361833
// data table.
@@ -1840,27 +1837,25 @@ void Profile::PrintSamplesPerfetto(
18401837
perfetto_utils::SetTimestampAndMonotonicClockId(packet.get(),
18411838
sample->timestamp());
18421839

1843-
const intptr_t callstack_iid = sample_index + 1;
1844-
// Add a |Callstack| to the interned data table that represents the stack
1845-
// trace stored in |sample|.
1846-
perfetto::protos::pbzero::Callstack* callstack =
1847-
packet->set_interned_data()->add_callstacks();
1848-
callstack->set_iid(callstack_iid);
1849-
// Walk the sampled PCs.
1850-
for (intptr_t frame_index = sample->length() - 1; frame_index >= 0;
1851-
--frame_index) {
1852-
ASSERT(sample->At(frame_index) != 0);
1853-
ProcessSampleFramePerfetto(callstack, cache, sample, frame_index);
1854-
}
1855-
18561840
// Populate |packet| with a |PerfSample| that is linked to the |Callstack|
18571841
// that we populated above.
1858-
perfetto::protos::pbzero::PerfSample& perf_sample =
1859-
*packet->set_perf_sample();
1842+
auto& perf_sample = *packet->set_perf_sample();
18601843
perf_sample.set_pid(OS::ProcessId());
18611844
perf_sample.set_tid(OSThread::ThreadIdToIntPtr(sample->tid()));
18621845
perf_sample.set_callstack_iid(callstack_iid);
18631846

1847+
if (callstack_interner.HasNewlyInternedEntries()) {
1848+
auto& interned_data = *packet->set_interned_data();
1849+
callstack_interner.FlushNewlyInternedTo(
1850+
[&interned_data](const auto& interned) {
1851+
auto& callstack = *interned_data.add_callstacks();
1852+
callstack.set_iid(interned.iid);
1853+
for (intptr_t i = 0; i < interned.length; i++) {
1854+
callstack.add_frame_ids(interned.data[i]);
1855+
}
1856+
});
1857+
}
1858+
18641859
perfetto_utils::AppendPacketToJSONBase64String(jsonBase64String, &packet);
18651860
packet.Reset();
18661861
}

runtime/vm/profiler_service.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -406,13 +406,6 @@ class Profile : public ValueObject {
406406
ProfileCodeInlinedFunctionsCache* cache,
407407
ProcessedSample* sample,
408408
intptr_t frame_index);
409-
#if defined(SUPPORT_PERFETTO) && !defined(PRODUCT)
410-
void ProcessSampleFramePerfetto(
411-
perfetto::protos::pbzero::Callstack* callstack,
412-
ProfileCodeInlinedFunctionsCache* cache,
413-
ProcessedSample* sample,
414-
intptr_t frame_index);
415-
#endif // defined(SUPPORT_PERFETTO) && !defined(PRODUCT)
416409
void ProcessInlinedFunctionFrameJSON(JSONArray* stack,
417410
const Function* inlined_function);
418411
void PrintFunctionFrameIndexJSON(JSONArray* stack, ProfileFunction* function);

0 commit comments

Comments
 (0)