Skip to content

Commit 3b884db

Browse files
authored
[InstrProf] Fix trace reservoir sampling (#152563)
`InstrProfWriter::addTemporalProfileTraces()` did not correctly account for when the sources traces are sampled, but the reservoir size is larger than what it was before, meaning there is room for more traces. Also, if the reservoir size decreased, meaning traces should be truncated. Depends on llvm/llvm-project#152550 for the test refactor
1 parent 688551f commit 3b884db

File tree

4 files changed

+28
-48
lines changed

4 files changed

+28
-48
lines changed

llvm/include/llvm/ProfileData/InstrProfWriter.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,6 @@ class InstrProfWriter {
226226
void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
227227
uint64_t Weight, function_ref<void(Error)> Warn);
228228
bool shouldEncodeData(const ProfilingData &PD);
229-
/// Add \p Trace using reservoir sampling.
230-
void addTemporalProfileTrace(TemporalProfTraceTy Trace);
231229

232230
/// Add a memprof record for a function identified by its \p Id.
233231
void addMemProfRecord(const GlobalValue::GUID Id,

llvm/lib/ProfileData/InstrProfWriter.cpp

Lines changed: 18 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData(
331331
DataAccessProfileData = std::move(DataAccessProfDataIn);
332332
}
333333

334-
void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
335-
assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
336-
assert(!Trace.FunctionNameRefs.empty());
337-
if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
338-
// Simply append the trace if we have not yet hit our reservoir size limit.
339-
TemporalProfTraces.push_back(std::move(Trace));
340-
} else {
341-
// Otherwise, replace a random trace in the stream.
342-
std::uniform_int_distribution<uint64_t> Distribution(
343-
0, TemporalProfTraceStreamSize);
344-
uint64_t RandomIndex = Distribution(RNG);
345-
if (RandomIndex < TemporalProfTraces.size())
346-
TemporalProfTraces[RandomIndex] = std::move(Trace);
347-
}
348-
++TemporalProfTraceStreamSize;
349-
}
350-
351334
void InstrProfWriter::addTemporalProfileTraces(
352335
SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
336+
if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize)
337+
TemporalProfTraces.truncate(TemporalProfTraceReservoirSize);
353338
for (auto &Trace : SrcTraces)
354339
if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
355340
Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
356341
llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
357-
// Assume that the source has the same reservoir size as the destination to
358-
// avoid needing to record it in the indexed profile format.
359-
bool IsDestSampled =
360-
(TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
361-
bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
362-
if (!IsDestSampled && IsSrcSampled) {
363-
// If one of the traces are sampled, ensure that it belongs to Dest.
364-
std::swap(TemporalProfTraces, SrcTraces);
365-
std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
366-
std::swap(IsDestSampled, IsSrcSampled);
367-
}
368-
if (!IsSrcSampled) {
369-
// If the source stream is not sampled, we add each source trace normally.
370-
for (auto &Trace : SrcTraces)
371-
addTemporalProfileTrace(std::move(Trace));
342+
// If there are no source traces, it is probably because
343+
// --temporal-profile-max-trace-length=0 was set to deliberately remove all
344+
// traces. In that case, we do not want to increase the stream size
345+
if (SrcTraces.empty())
372346
return;
373-
}
374-
// Otherwise, we find the traces that would have been removed if we added
375-
// the whole source stream.
376-
SmallSetVector<uint64_t, 8> IndicesToReplace;
377-
for (uint64_t I = 0; I < SrcStreamSize; I++) {
378-
std::uniform_int_distribution<uint64_t> Distribution(
379-
0, TemporalProfTraceStreamSize);
347+
// Add traces until our reservoir is full or we run out of source traces
348+
auto SrcTraceIt = SrcTraces.begin();
349+
while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize &&
350+
SrcTraceIt < SrcTraces.end())
351+
TemporalProfTraces.push_back(*SrcTraceIt++);
352+
// Our reservoir is full, we need to sample the source stream
353+
llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG);
354+
for (uint64_t I = TemporalProfTraces.size();
355+
I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) {
356+
std::uniform_int_distribution<uint64_t> Distribution(0, I);
380357
uint64_t RandomIndex = Distribution(RNG);
381358
if (RandomIndex < TemporalProfTraces.size())
382-
IndicesToReplace.insert(RandomIndex);
383-
++TemporalProfTraceStreamSize;
359+
TemporalProfTraces[RandomIndex] = *SrcTraceIt++;
384360
}
385-
// Then we insert a random sample of the source traces.
386-
llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
387-
for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
388-
TemporalProfTraces[Index] = std::move(Trace);
361+
TemporalProfTraceStreamSize += SrcStreamSize;
389362
}
390363

391364
void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,

llvm/test/tools/llvm-profdata/merge-traces.proftext

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,24 @@
1212
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %s %s --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
1313
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %t-2.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
1414

15+
# Test that we can increase the reservoir size, even if inputs are sampled
16+
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %s %s %s %s -o %t-4.profdata
17+
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=4 %t-4.profdata %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN8,SAMPLE4
18+
19+
# Test that decreasing the reservoir size truncates traces
20+
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=1 %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE1
21+
1522
# CHECK: :temporal_prof_traces
1623
# CHECK: # Num Temporal Profile Traces:
1724
# SAMPLE1: 1
1825
# SAMPLE2: 2
26+
# SAMPLE4: 4
1927
# CHECK: # Temporal Profile Trace Stream Size:
2028
# SEEN1: 1
2129
# SEEN2: 2
2230
# SEEN3: 3
2331
# SEEN4: 4
32+
# SEEN8: 8
2433
# CHECK: a,b,c,
2534

2635
# Header

llvm/test/tools/llvm-profdata/trace-limit.proftext

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# RUN: llvm-profdata merge --temporal-profile-max-trace-length=1000 %s -o %t.profdata
1212
# RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,ALL
1313

14-
# NONE: Temporal Profile Traces (samples=0
14+
# NONE: Temporal Profile Traces (samples=0 seen=0):
1515
# CHECK: Temporal Profile Traces (samples=1 seen=1):
1616
# SOME: Trace 0 (weight=1 count=2):
1717
# ALL: Trace 0 (weight=1 count=3):

0 commit comments

Comments
 (0)