Skip to content

[InstrProf] Fix trace reservoir sampling #152563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions llvm/include/llvm/ProfileData/InstrProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,6 @@ class InstrProfWriter {
void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
uint64_t Weight, function_ref<void(Error)> Warn);
bool shouldEncodeData(const ProfilingData &PD);
/// Add \p Trace using reservoir sampling.
void addTemporalProfileTrace(TemporalProfTraceTy Trace);

/// Add a memprof record for a function identified by its \p Id.
void addMemProfRecord(const GlobalValue::GUID Id,
Expand Down
63 changes: 18 additions & 45 deletions llvm/lib/ProfileData/InstrProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData(
DataAccessProfileData = std::move(DataAccessProfDataIn);
}

void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
assert(!Trace.FunctionNameRefs.empty());
if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
// Simply append the trace if we have not yet hit our reservoir size limit.
TemporalProfTraces.push_back(std::move(Trace));
} else {
// Otherwise, replace a random trace in the stream.
std::uniform_int_distribution<uint64_t> Distribution(
0, TemporalProfTraceStreamSize);
uint64_t RandomIndex = Distribution(RNG);
if (RandomIndex < TemporalProfTraces.size())
TemporalProfTraces[RandomIndex] = std::move(Trace);
}
++TemporalProfTraceStreamSize;
}

void InstrProfWriter::addTemporalProfileTraces(
SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize)
TemporalProfTraces.truncate(TemporalProfTraceReservoirSize);
for (auto &Trace : SrcTraces)
if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); });
// Assume that the source has the same reservoir size as the destination to
// avoid needing to record it in the indexed profile format.
bool IsDestSampled =
(TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
if (!IsDestSampled && IsSrcSampled) {
// If one of the traces are sampled, ensure that it belongs to Dest.
std::swap(TemporalProfTraces, SrcTraces);
std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
std::swap(IsDestSampled, IsSrcSampled);
}
if (!IsSrcSampled) {
// If the source stream is not sampled, we add each source trace normally.
for (auto &Trace : SrcTraces)
addTemporalProfileTrace(std::move(Trace));
// If there are no source traces, it is probably because
// --temporal-profile-max-trace-length=0 was set to deliberately remove all
// traces. In that case, we do not want to increase the stream size
if (SrcTraces.empty())
return;
}
// Otherwise, we find the traces that would have been removed if we added
// the whole source stream.
SmallSetVector<uint64_t, 8> IndicesToReplace;
for (uint64_t I = 0; I < SrcStreamSize; I++) {
std::uniform_int_distribution<uint64_t> Distribution(
0, TemporalProfTraceStreamSize);
// Add traces until our reservoir is full or we run out of source traces
auto SrcTraceIt = SrcTraces.begin();
while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize &&
SrcTraceIt < SrcTraces.end())
TemporalProfTraces.push_back(*SrcTraceIt++);
// Our reservoir is full, we need to sample the source stream
llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG);
for (uint64_t I = TemporalProfTraces.size();
I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) {
std::uniform_int_distribution<uint64_t> Distribution(0, I);
uint64_t RandomIndex = Distribution(RNG);
if (RandomIndex < TemporalProfTraces.size())
IndicesToReplace.insert(RandomIndex);
++TemporalProfTraceStreamSize;
TemporalProfTraces[RandomIndex] = *SrcTraceIt++;
}
// Then we insert a random sample of the source traces.
llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
TemporalProfTraces[Index] = std::move(Trace);
TemporalProfTraceStreamSize += SrcStreamSize;
}

void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/tools/llvm-profdata/merge-traces.proftext
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,24 @@
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %s %s --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %t-2.profdata %t-2.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE2

# Test that we can increase the reservoir size, even if inputs are sampled
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=2 %s %s %s %s -o %t-4.profdata
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=4 %t-4.profdata %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN8,SAMPLE4

# Test that decreasing the reservoir size truncates traces
# RUN: llvm-profdata merge --temporal-profile-trace-reservoir-size=1 %t-4.profdata --text | FileCheck %s --check-prefixes=CHECK,SEEN4,SAMPLE1

# CHECK: :temporal_prof_traces
# CHECK: # Num Temporal Profile Traces:
# SAMPLE1: 1
# SAMPLE2: 2
# SAMPLE4: 4
# CHECK: # Temporal Profile Trace Stream Size:
# SEEN1: 1
# SEEN2: 2
# SEEN3: 3
# SEEN4: 4
# SEEN8: 8
# CHECK: a,b,c,

# Header
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/tools/llvm-profdata/trace-limit.proftext
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# RUN: llvm-profdata merge --temporal-profile-max-trace-length=1000 %s -o %t.profdata
# RUN: llvm-profdata show --temporal-profile-traces %t.profdata | FileCheck %s --check-prefixes=CHECK,ALL

# NONE: Temporal Profile Traces (samples=0
# NONE: Temporal Profile Traces (samples=0 seen=0):
# CHECK: Temporal Profile Traces (samples=1 seen=1):
# SOME: Trace 0 (weight=1 count=2):
# ALL: Trace 0 (weight=1 count=3):
Expand Down