From f34ab2d0d5f767d46f31452a8231e56f67ed4a21 Mon Sep 17 00:00:00 2001 From: h2h Date: Thu, 9 Oct 2025 12:07:13 -0700 Subject: [PATCH 01/11] [llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete --- llvm/include/llvm/ProfileData/SampleProf.h | 10 ++++- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 27 ++++++++++++ llvm/tools/llvm-profgen/ProfiledBinary.cpp | 44 ++++++++++++++++++++ llvm/tools/llvm-profgen/ProfiledBinary.h | 3 ++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 3dd34aba2d716..4adbe13b6712b 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -1214,12 +1214,18 @@ class FunctionSamples { // Note the sequence of the suffixes in the knownSuffixes array matters. // If suffix "A" is appended after the suffix "B", "A" should be in front // of "B" in knownSuffixes. - const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; + SmallVector KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix}); + return getCanonicalFnName(FnName, KnownSuffixes, Attr); + } + + static StringRef getCanonicalFnName(StringRef FnName, + const SmallVector &Suffixes, + StringRef Attr = "selected") { if (Attr == "" || Attr == "all") return FnName.split('.').first; if (Attr == "selected") { StringRef Cand(FnName); - for (const auto &Suf : KnownSuffixes) { + for (const auto &Suf : Suffixes) { StringRef Suffix(Suf); // If the profile contains ".__uniq." suffix, don't strip the // suffix for names in the IR. diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 3b875c5de3c09..058b154fc5a57 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -449,29 +449,56 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( // Go through all the stacks, ranges and branches in sample counters, use // the start of the range to look up the function it belongs and record the // function. + uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0; + uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0; for (const auto &CI : *SampleCounters) { if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { for (auto StackAddr : CtxKey->Context) { + uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0; + TotalStkAddr += inc; if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) ProfiledFunctions.insert(FRange->Func); + else + ErrStkAddr += inc; } } for (auto Item : CI.second.RangeCounter) { uint64_t StartAddress = Item.first.first; + uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0; + TotalFuncRange += inc; if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) ProfiledFunctions.insert(FRange->Func); + else + ErrFuncRange += inc; } for (auto Item : CI.second.BranchCounter) { uint64_t SourceAddress = Item.first.first; uint64_t TargetAddress = Item.first.second; + uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0; + uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0; + TotalSrc += srcinc; if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) ProfiledFunctions.insert(FRange->Func); + else + ErrSrc += srcinc; + TotalTgt += tgtinc; if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) ProfiledFunctions.insert(FRange->Func); + else + ErrTgt += tgtinc; } } + + if (ErrStkAddr) + WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n"; + if (ErrFuncRange) + WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n"; + if (ErrSrc) + WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n"; + if (ErrTgt) + WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n"; return true; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 94728ce4abffe..2d9a13b97114c 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -257,6 +257,8 @@ void ProfiledBinary::load() { if (ShowDisassemblyOnly) decodePseudoProbe(Obj); + populateSymbolsFromElf(Obj); + // Disassemble the text sections. disassemble(Obj); @@ -820,6 +822,48 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { } } +void ProfiledBinary::populateSymbolsFromElf( + const ObjectFile *Obj) { + // Load binary functions from ELF symbol table when DWARF info is incomplete + StringRef FileName = Obj->getFileName(); + for (const ELFSymbolRef Symbol : Obj->symbols()) { + const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); + const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); + const StringRef Name = unwrapOrError(Symbol.getName(), FileName); + const uint64_t Size = Symbol.getSize(); + + if (Size == 0 || Type != SymbolRef::ST_Function) + continue; + + SmallVector Suffixes( + {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); + const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes); + + auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); + auto &Func = Ret.first->second; + if (Ret.second) + Func.FuncName = Ret.first->first; + + if (auto Range = findFuncRange(Addr)) { + if (Ret.second && ShowDetailedWarning) + WithColor::warning() + << "Symbol " << Name << " start address " + << format("%8" PRIx64, Addr) << " already exists in DWARF at " + << format("%8" PRIx64, Range->StartAddress) << " in function " + << Range->getFuncName() << "\n"; + } else { + // Store/Update Function Range from SymTab + Func.Ranges.emplace_back(Addr, Addr + Size); + + auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange()); + FuncRange &FRange = R.first->second; + FRange.Func = &Func; + FRange.StartAddress = Addr; + FRange.EndAddress = Addr + Size; + } + } +} + void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { for (const auto &DieInfo : CompilationUnit.dies()) { llvm::DWARFDie Die(&CompilationUnit, &DieInfo); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 5a814b7dbd52d..238c27fbc4c9f 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -356,6 +356,9 @@ class ProfiledBinary { // Create symbol to its start address mapping. void populateSymbolAddressList(const object::ObjectFile *O); + // Load functions from its symbol table (when DWARF info is missing). + void populateSymbolsFromElf(const object::ObjectFile *O); + // A function may be spilt into multiple non-continuous address ranges. We use // this to set whether start a function range is the real entry of the // function and also set false to the non-function label. From 0fd352d28316691d97c245f80fe8205309c3b253 Mon Sep 17 00:00:00 2001 From: h2h Date: Wed, 15 Oct 2025 16:25:56 -0700 Subject: [PATCH 02/11] formatting --- llvm/include/llvm/ProfileData/SampleProf.h | 2 +- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 13 +++++++++---- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 ++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 4adbe13b6712b..dadf718d0b904 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -1214,7 +1214,7 @@ class FunctionSamples { // Note the sequence of the suffixes in the knownSuffixes array matters. // If suffix "A" is appended after the suffix "B", "A" should be in front // of "B" in knownSuffixes. - SmallVector KnownSuffixes ({LLVMSuffix, PartSuffix, UniqSuffix}); + SmallVector KnownSuffixes({LLVMSuffix, PartSuffix, UniqSuffix}); return getCanonicalFnName(FnName, KnownSuffixes, Attr); } diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 058b154fc5a57..0478d5568085a 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -492,13 +492,18 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( } if (ErrStkAddr) - WithColor::warning() << "Cannot find Stack Address from DWARF Info: " << ErrStkAddr << "/" << TotalStkAddr << " missing\n"; + WithColor::warning() << "Cannot find Stack Address from DWARF Info: " + << ErrStkAddr << "/" << TotalStkAddr << " missing\n"; if (ErrFuncRange) - WithColor::warning() << "Cannot find Function Range from DWARF Info: " << ErrFuncRange << "/" << TotalFuncRange << " missing\n"; + WithColor::warning() << "Cannot find Function Range from DWARF Info: " + << ErrFuncRange << "/" << TotalFuncRange + << " missing\n"; if (ErrSrc) - WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " << ErrSrc << "/" << TotalSrc << " missing\n"; + WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " + << ErrSrc << "/" << TotalSrc << " missing\n"; if (ErrTgt) - WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " << ErrTgt << "/" << TotalTgt << " missing\n"; + WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " + << ErrTgt << "/" << TotalTgt << " missing\n"; return true; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 2d9a13b97114c..aa385c0db50db 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -822,8 +822,7 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { } } -void ProfiledBinary::populateSymbolsFromElf( - const ObjectFile *Obj) { +void ProfiledBinary::populateSymbolsFromElf(const ObjectFile *Obj) { // Load binary functions from ELF symbol table when DWARF info is incomplete StringRef FileName = Obj->getFileName(); for (const ELFSymbolRef Symbol : Obj->symbols()) { @@ -836,8 +835,9 @@ void ProfiledBinary::populateSymbolsFromElf( continue; SmallVector Suffixes( - {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); - const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes); + {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); + const StringRef SymName = + FunctionSamples::getCanonicalFnName(Name, Suffixes); auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); auto &Func = Ret.first->second; From c097d374402f11fe00d997495b10a834ff6a4d9e Mon Sep 17 00:00:00 2001 From: h2h Date: Thu, 16 Oct 2025 10:59:47 -0700 Subject: [PATCH 03/11] Fix branch target check when an instruction branches to itself. (i.e. jmp 0) --- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index aa385c0db50db..2ceeba28f77a8 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -606,13 +606,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, // Record potential call targets for tail frame inference later-on. if (InferMissingFrames && FRange) { uint64_t Target = 0; - MIA->evaluateBranch(Inst, Address, Size, Target); + bool Err = MIA->evaluateBranch(Inst, Address, Size, Target); if (MCDesc.isCall()) { // Indirect call targets are unknown at this point. Recording the // unknown target (zero) for further LBR-based refinement. MissingContextInferrer->CallEdges[Address].insert(Target); } else if (MCDesc.isUnconditionalBranch()) { - assert(Target && + assert(Err && "target should be known for unconditional direct branch"); // Any inter-function unconditional jump is considered tail call at // this point. This is not 100% accurate and could further be From a19064d73c04e68757b2cf1323c78b40b649f75f Mon Sep 17 00:00:00 2001 From: h2h Date: Thu, 16 Oct 2025 22:36:53 -0700 Subject: [PATCH 04/11] Making the API compatible with non-ELF binaries --- llvm/include/llvm/Object/ObjectFile.h | 5 +++++ llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 ++++---- llvm/tools/llvm-profgen/ProfiledBinary.h | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 289cc770e3466..6ceedd2d310f7 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -198,6 +198,7 @@ class SymbolRef : public BasicSymbolRef { /// Get the alignment of this symbol as the actual value (not log 2). uint32_t getAlignment() const; uint64_t getCommonSize() const; + uint64_t getSize() const; Expected getType() const; /// Get section this symbol is defined in reference to. Result is @@ -482,6 +483,10 @@ inline uint64_t SymbolRef::getCommonSize() const { return getObject()->getCommonSymbolSize(getRawDataRefImpl()); } +inline uint64_t SymbolRef::getSize() const { + return getObject()->getCommonSymbolSizeImpl(getRawDataRefImpl()); +} + inline Expected SymbolRef::getSection() const { return getObject()->getSymbolSection(getRawDataRefImpl()); } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 2ceeba28f77a8..c9561aa9cfb3c 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -257,7 +257,7 @@ void ProfiledBinary::load() { if (ShowDisassemblyOnly) decodePseudoProbe(Obj); - populateSymbolsFromElf(Obj); + populateSymbolsFromBinary(Obj); // Disassemble the text sections. disassemble(Obj); @@ -822,10 +822,10 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { } } -void ProfiledBinary::populateSymbolsFromElf(const ObjectFile *Obj) { - // Load binary functions from ELF symbol table when DWARF info is incomplete +void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { + // Load binary functions from symbol table when Debug info is incomplete StringRef FileName = Obj->getFileName(); - for (const ELFSymbolRef Symbol : Obj->symbols()) { + for (const SymbolRef &Symbol : Obj->symbols()) { const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); const StringRef Name = unwrapOrError(Symbol.getName(), FileName); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 238c27fbc4c9f..e73ffd3143e3d 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -357,7 +357,7 @@ class ProfiledBinary { void populateSymbolAddressList(const object::ObjectFile *O); // Load functions from its symbol table (when DWARF info is missing). - void populateSymbolsFromElf(const object::ObjectFile *O); + void populateSymbolsFromBinary(const object::ObjectFile *O); // A function may be spilt into multiple non-continuous address ranges. We use // this to set whether start a function range is the real entry of the From e12e694c1b9e3563dd8351e225b7acec05e12d5a Mon Sep 17 00:00:00 2001 From: h2h Date: Mon, 20 Oct 2025 09:40:57 -0700 Subject: [PATCH 05/11] Fix --- llvm/include/llvm/ProfileData/SampleProf.h | 9 ++-- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 43 ++++++++++---------- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 +- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index dadf718d0b904..6de5884253017 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -1214,19 +1214,18 @@ class FunctionSamples { // Note the sequence of the suffixes in the knownSuffixes array matters. // If suffix "A" is appended after the suffix "B", "A" should be in front // of "B" in knownSuffixes. - SmallVector KnownSuffixes({LLVMSuffix, PartSuffix, UniqSuffix}); + const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr}; return getCanonicalFnName(FnName, KnownSuffixes, Attr); } - static StringRef getCanonicalFnName(StringRef FnName, - const SmallVector &Suffixes, + static StringRef getCanonicalFnName(StringRef FnName, const char *Suffixes[], StringRef Attr = "selected") { if (Attr == "" || Attr == "all") return FnName.split('.').first; if (Attr == "selected") { StringRef Cand(FnName); - for (const auto &Suf : Suffixes) { - StringRef Suffix(Suf); + for (const char **Suf = Suffixes; *Suf; Suf++) { + StringRef Suffix(*Suf); // If the profile contains ".__uniq." suffix, don't strip the // suffix for names in the IR. if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 0478d5568085a..2f6f50912fbcf 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -454,56 +454,57 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( for (const auto &CI : *SampleCounters) { if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { for (auto StackAddr : CtxKey->Context) { - uint64_t inc = Binary->addressIsCode(StackAddr) ? 1 : 0; - TotalStkAddr += inc; + uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0; + TotalStkAddr += Inc; if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) ProfiledFunctions.insert(FRange->Func); else - ErrStkAddr += inc; + ErrStkAddr += Inc; } } for (auto Item : CI.second.RangeCounter) { uint64_t StartAddress = Item.first.first; - uint64_t inc = Binary->addressIsCode(StartAddress) ? 1 : 0; - TotalFuncRange += inc; + uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0; + TotalFuncRange += Inc; if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) ProfiledFunctions.insert(FRange->Func); else - ErrFuncRange += inc; + ErrFuncRange += Inc; } for (auto Item : CI.second.BranchCounter) { uint64_t SourceAddress = Item.first.first; uint64_t TargetAddress = Item.first.second; - uint64_t srcinc = Binary->addressIsCode(SourceAddress) ? 1 : 0; - uint64_t tgtinc = Binary->addressIsCode(TargetAddress) ? 1 : 0; - TotalSrc += srcinc; + uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0; + uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0; + TotalSrc += SrcInc; if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) ProfiledFunctions.insert(FRange->Func); else - ErrSrc += srcinc; - TotalTgt += tgtinc; + ErrSrc += SrcInc; + TotalTgt += TgtInc; if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) ProfiledFunctions.insert(FRange->Func); else - ErrTgt += tgtinc; + ErrTgt += TgtInc; } } if (ErrStkAddr) - WithColor::warning() << "Cannot find Stack Address from DWARF Info: " - << ErrStkAddr << "/" << TotalStkAddr << " missing\n"; + emitWarningSummary( + ErrStkAddr, TotalStkAddr, + "of stack address samples do not belong to any function"); if (ErrFuncRange) - WithColor::warning() << "Cannot find Function Range from DWARF Info: " - << ErrFuncRange << "/" << TotalFuncRange - << " missing\n"; + emitWarningSummary( + ErrFuncRange, TotalFuncRange, + "of function range samples do not belong to any function"); if (ErrSrc) - WithColor::warning() << "Cannot find LBR Source Addr from DWARF Info: " - << ErrSrc << "/" << TotalSrc << " missing\n"; + emitWarningSummary(ErrSrc, TotalSrc, + "of LBR source samples do not belong to any function"); if (ErrTgt) - WithColor::warning() << "Cannot find LBR Target Addr from DWARF Info: " - << ErrTgt << "/" << TotalTgt << " missing\n"; + emitWarningSummary(ErrTgt, TotalTgt, + "of LBR target samples do not belong to any function"); return true; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index c9561aa9cfb3c..1dab93fc871d2 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -834,8 +834,8 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { if (Size == 0 || Type != SymbolRef::ST_Function) continue; - SmallVector Suffixes( - {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); + const char *Suffixes[] = {".destroy", ".resume", ".llvm.", + ".cold", ".warm", nullptr}; const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes); From 5eead6b006e3a223b52b29078d449d3cf1a137d8 Mon Sep 17 00:00:00 2001 From: h2h Date: Wed, 22 Oct 2025 16:29:30 -0700 Subject: [PATCH 06/11] Clean up getSymbolSize API and warnings --- llvm/include/llvm/Object/ELFObjectFile.h | 6 ++++++ llvm/include/llvm/Object/ObjectFile.h | 3 ++- llvm/tools/llvm-profgen/PerfReader.cpp | 11 +---------- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 ++-- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index ced1afdd4cc6a..cb7e6ef3458a9 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -310,6 +310,7 @@ template class ELFObjectFile : public ELFObjectFileBase { uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; uint32_t getSymbolAlignment(DataRefImpl Symb) const override; uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; + uint64_t getSymbolSizeImpl(DataRefImpl Symb) const override; Expected getSymbolFlags(DataRefImpl Symb) const override; uint8_t getSymbolBinding(DataRefImpl Symb) const override; uint8_t getSymbolOther(DataRefImpl Symb) const override; @@ -703,6 +704,11 @@ uint64_t ELFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { return getSymbolSize(Symb); } +template +uint64_t ELFObjectFile::getSymbolSizeImpl(DataRefImpl Symb) const { + return getSymbolSize(Symb); +} + template uint8_t ELFObjectFile::getSymbolBinding(DataRefImpl Symb) const { Expected SymOrErr = getSymbol(Symb); diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 6ceedd2d310f7..bea61cf7c2214 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -256,6 +256,7 @@ class LLVM_ABI ObjectFile : public SymbolicFile { virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0; virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const; virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0; + virtual uint64_t getSymbolSizeImpl(DataRefImpl Symb) const { return 0; } virtual Expected getSymbolType(DataRefImpl Symb) const = 0; virtual Expected getSymbolSection(DataRefImpl Symb) const = 0; @@ -484,7 +485,7 @@ inline uint64_t SymbolRef::getCommonSize() const { } inline uint64_t SymbolRef::getSize() const { - return getObject()->getCommonSymbolSizeImpl(getRawDataRefImpl()); + return getObject()->getSymbolSizeImpl(getRawDataRefImpl()); } inline Expected SymbolRef::getSection() const { diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 183b248a72320..e9f7b666c95c7 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -1274,8 +1274,6 @@ void PerfScriptReader::warnInvalidRange() { const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " "likely due to profile and binary mismatch."; - const char *DanglingRangeMsg = "Range does not belong to any functions, " - "likely from PLT, .init or .fini section."; const char *RangeCrossFuncMsg = "Fall through range should not cross function boundaries, likely due to " "profile and binary mismatch."; @@ -1283,7 +1281,6 @@ void PerfScriptReader::warnInvalidRange() { uint64_t TotalRangeNum = 0; uint64_t InstNotBoundary = 0; - uint64_t UnmatchedRange = 0; uint64_t RangeCrossFunc = 0; uint64_t BogusRange = 0; @@ -1303,11 +1300,8 @@ void PerfScriptReader::warnInvalidRange() { } auto *FRange = Binary->findFuncRange(StartAddress); - if (!FRange) { - UnmatchedRange += I.second; - WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg); + if (!FRange) continue; - } if (EndAddress >= FRange->EndAddress) { RangeCrossFunc += I.second; @@ -1325,9 +1319,6 @@ void PerfScriptReader::warnInvalidRange() { emitWarningSummary( InstNotBoundary, TotalRangeNum, "of samples are from ranges that are not on instruction boundary."); - emitWarningSummary( - UnmatchedRange, TotalRangeNum, - "of samples are from ranges that do not belong to any functions."); emitWarningSummary( RangeCrossFunc, TotalRangeNum, "of samples are from ranges that do cross function boundaries."); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 1dab93fc871d2..469d31d95c00a 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -824,6 +824,8 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { // Load binary functions from symbol table when Debug info is incomplete + const char *Suffixes[] = {".destroy", ".resume", ".llvm.", + ".cold", ".warm", nullptr}; StringRef FileName = Obj->getFileName(); for (const SymbolRef &Symbol : Obj->symbols()) { const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); @@ -834,8 +836,6 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { if (Size == 0 || Type != SymbolRef::ST_Function) continue; - const char *Suffixes[] = {".destroy", ".resume", ".llvm.", - ".cold", ".warm", nullptr}; const StringRef SymName = FunctionSamples::getCanonicalFnName(Name, Suffixes); From 8f59bfa035070c3cf638f696730156d26a5165fe Mon Sep 17 00:00:00 2001 From: h2h Date: Wed, 22 Oct 2025 20:12:53 -0700 Subject: [PATCH 07/11] Add cmdline option --load-function-from-symbol --- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 469d31d95c00a..4a88c2becf133 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -65,6 +65,13 @@ static cl::list DisassembleFunctions( "names only. Only work with show-disassembly-only"), cl::cat(ProfGenCategory)); +static cl::opt + LoadFunctionFromSymbol("load-function-from-symbol", + cl::desc("Gather additional binary function info " + "from symbols (e.g. .symtab) in case " + "dwarf info is incomplete."), + cl::cat(ProfGenCategory)); + static cl::opt KernelBinary("kernel", cl::desc("Generate the profile for Linux kernel binary."), @@ -257,7 +264,8 @@ void ProfiledBinary::load() { if (ShowDisassemblyOnly) decodePseudoProbe(Obj); - populateSymbolsFromBinary(Obj); + if (LoadFunctionFromSymbol || UsePseudoProbes) + populateSymbolsFromBinary(Obj); // Disassemble the text sections. disassemble(Obj); From a967994990b9bdd9dcd35c9739d1a8442c694d0a Mon Sep 17 00:00:00 2001 From: h2h Date: Thu, 23 Oct 2025 10:25:29 -0700 Subject: [PATCH 08/11] Get symbol size only for ELFObjectFile --- llvm/include/llvm/Object/ELFObjectFile.h | 6 ------ llvm/include/llvm/Object/ObjectFile.h | 6 ------ llvm/tools/llvm-profgen/ProfiledBinary.cpp | 6 +++++- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index cb7e6ef3458a9..ced1afdd4cc6a 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -310,7 +310,6 @@ template class ELFObjectFile : public ELFObjectFileBase { uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; uint32_t getSymbolAlignment(DataRefImpl Symb) const override; uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; - uint64_t getSymbolSizeImpl(DataRefImpl Symb) const override; Expected getSymbolFlags(DataRefImpl Symb) const override; uint8_t getSymbolBinding(DataRefImpl Symb) const override; uint8_t getSymbolOther(DataRefImpl Symb) const override; @@ -704,11 +703,6 @@ uint64_t ELFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { return getSymbolSize(Symb); } -template -uint64_t ELFObjectFile::getSymbolSizeImpl(DataRefImpl Symb) const { - return getSymbolSize(Symb); -} - template uint8_t ELFObjectFile::getSymbolBinding(DataRefImpl Symb) const { Expected SymOrErr = getSymbol(Symb); diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index bea61cf7c2214..289cc770e3466 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -198,7 +198,6 @@ class SymbolRef : public BasicSymbolRef { /// Get the alignment of this symbol as the actual value (not log 2). uint32_t getAlignment() const; uint64_t getCommonSize() const; - uint64_t getSize() const; Expected getType() const; /// Get section this symbol is defined in reference to. Result is @@ -256,7 +255,6 @@ class LLVM_ABI ObjectFile : public SymbolicFile { virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0; virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const; virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0; - virtual uint64_t getSymbolSizeImpl(DataRefImpl Symb) const { return 0; } virtual Expected getSymbolType(DataRefImpl Symb) const = 0; virtual Expected getSymbolSection(DataRefImpl Symb) const = 0; @@ -484,10 +482,6 @@ inline uint64_t SymbolRef::getCommonSize() const { return getObject()->getCommonSymbolSize(getRawDataRefImpl()); } -inline uint64_t SymbolRef::getSize() const { - return getObject()->getSymbolSizeImpl(getRawDataRefImpl()); -} - inline Expected SymbolRef::getSection() const { return getObject()->getSymbolSection(getRawDataRefImpl()); } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 4a88c2becf133..93f605a891afc 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -839,7 +839,11 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); const StringRef Name = unwrapOrError(Symbol.getName(), FileName); - const uint64_t Size = Symbol.getSize(); + uint64_t Size = 0; + if (isa(Symbol.getObject())) { + ELFSymbolRef ElfSymbol(Symbol); + Size = ElfSymbol.getSize(); + } if (Size == 0 || Type != SymbolRef::ST_Function) continue; From 0dc2c669f3639fb91545288ba76647deec172978 Mon Sep 17 00:00:00 2001 From: h2h Date: Thu, 23 Oct 2025 14:49:54 -0700 Subject: [PATCH 09/11] Add unit test --- .../llvm-profgen/Inputs/missing-dwarf.exe | Bin 0 -> 18744 bytes .../tools/llvm-profgen/missing-dwarf.test | 40 ++++++++++++++++++ llvm/tools/llvm-profgen/ProfiledBinary.cpp | 8 ++-- 3 files changed, 45 insertions(+), 3 deletions(-) create mode 100755 llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe create mode 100644 llvm/test/tools/llvm-profgen/missing-dwarf.test diff --git a/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe b/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe new file mode 100755 index 0000000000000000000000000000000000000000..c4b8af0bf1f2ab87e195231a326fc756a7b46f23 GIT binary patch literal 18744 zcmeHPYiu0V6+W|LI}Rc7CJ+d9h^OUIN2+IcZO5?7$+3{40H+Y_1=6&zAt3J;pnsoGCDsoc z!GKQqG_)YngqtBsNP0PzzYKW=EL)|WijsKSC?lj%CASL=fF-w(B=OdgA+w%rZT3`_ z5cYK9i6W_k+&`8RvI3TsJW-$K`2o%FGE%-QVV0FVijARr2F6}PWLTb&^_`WHc@!?< zO-bIAzJR7+7+BpFoP7`=YY*6*%_^;TI}!KDG!CQV@Y40zm|V2m}!bA`nC%h(Hj5|Az?txoyWs@e_Y& zjen{6?Fc~pkySTTdM$q9<I`Afk`PAO!JwsRw#jEa!pFH=@ z1Eo^wmF5T{%F$CRr)Jk0*VbVG*}wbv8b*{K(GA8?^oV{*$M}R#kN$(2`OX>i7F)&m zgpZyo$F3YLG6_-;fgl1w1cC?z5eOm>L?DPj5P={9K?H&b1QGb(i-1yo1Lw6(X;*Kd zcdztrEPTFHila18-c7~tmr6OpQLd1V>1t1a^1vZb7Q;&WmFrsR-88+&+m74npGu{h z$!26-n6M~rXAqOG>4gp6X2WAx#|D%zv?=^(Xn14G7&=0geAu`xVmal@<#&4J1O+LG zKoEf-0zm|V2m}!bA`nC%h(Hj5AOb-I{sR%fm%x->liySGx0&=UC#5Srfj5MP$ZeGR zt&(@0)cL#UcB%9C$)AvVe6JyL;vNulT>inuQkDu9RfB~0jW^10lR>KZmYU?#viu5C zFfDij#62hGw@H1glq}ZC%(K!T*LzIj>!oD=#gf;a$D}>JE*9xiLlKx*4 z{9Spy28<664{DwG_3}QLQ`L-5cE<8_+rrIkpzJTv^Z+i|vGD+RDsn$DMp3 zmCb0~-Fj4y&ZS(fbKZ4xg~3P!mHA>qPi7Y)>GW~$rz@Aw-s>b?oh+?^zTE=@y#oW$ z#6ZHf2kfNXvnx5UyJx_TS^d4e-O0XzUOQ^-grUQOy}4NTKpz}BJeV(LGAXQjE}8T< z6!sq;v@)4&B9*CIJbve3bg{?oj_&HSG+mDrmI}6$D@5>f?}(L1b!D=yqbFf{YOn{U zhX-RYJT};W3B$3;!LHaP%EyP$$J7LQ8y$>7Y8rp4+K|emT&z%3BlDh5sZ<%`N7`;G z*_C(F*(8&Em`C=3&%Lnit;v@z+>Hlki8@}L&1OIP#S_2pv%mV5hbhoR-+nU~0%f@^ z^bjynLA^4z|V&!!sWAc2b^Tls-hb&`R&xXf~YcAo3$B+>@melie zO!8YUVZ(k{*7L(n{rq!}+_))u=I3(O-XET!kZb@uu?iq|%v+JHywJ zYP_Xe`2auh-S~n}M&>)%X^ucb!pj#RsqJsJ6$a%7#%1iM$G1VOw4N8rWVJ zr(>x$spYGEb(so08}Z{j3-ROapdB-+LDbaps(+O6a zRQOaah=>X|G{9+8s!j^3P51(!8Uj|&o9~jLCh$y*6-9lN{{2FK<#n(UBt;h$bpV*DZPN3f>$X2@q7mHdhKttU%wT8 z+F(^PFjt-G`FS5r6sSEfJ`&X0^Km2Yph9n3e8>3>h;ON`PG|<=S{?j8f!Cf#cOxFg z^UHr8B@y2esvS>K(%yeQ9vA*=D}Ds=&7qCZ>Wj~lgyU*b9+&WHB+@q#--7+c*A$+o ze+%(2_Pgeb!?(#kv;n5%@xzy$tn{Nv{ATI@CF#e%zpt{rJWlw2c5e@ z4h|id7)CPQNtmN?SsEWX1m^hU?fZu&&D+PurblPYnW6oYqsZ{DKr}9Q-JxS!t_5Nr zX!lU@79pZiy&}=H3t4mC%Gk78apHEgvQrteSa58h2?_4H4W^wo#mw4r9dd&+Z8Mjm z)s2ZlLHZSw7SqJlkG!|EEVAxeM-eUny|A?4S_zbH-jnm3!#&w(!PIp#7&9dC_t zPTpOT(9r&gF6w%n%I2-YJm~gP2K{-`&3h&C2CPaku?+XcA_C~taiF`-BK}f;V_8;2 zPIu;I56|1kV9jHAUF|VAkM696R1)jTqA&c>b7{%`l(m=?_XiKPTe$vmML(kWbHgSm*itW0J4Qid#@twqL^V0A|RP`8@A`R`QQY zL$2Sy{|{q1`6d4%@w`dgYy_)5kN4jz1^7B1U`S^xr2vb87 z0w}(f$zT5uuy7M@@^CHBANl_b`0ZI|`4V#5D)@Zg6qo<^f$h*lB-UBJiYWCx*U$G? z{C^|(|3mQc!90%Zo5-d3F`w_l&PEW#q>10PALg^9`Eez`JTHxU96nFk1M^s(^YM8e zxlQu@{Y7xU)+A4$8En%{(0n*Pic-hFERfa2BhTrS$>FkJ^l}+!@X?SfZvMc zJ`-X2LCho2Gm*;Vw{4gFm|7O!+?cR>>dn5&ZqJPu4H1 zDU0PEFWXs0yo}Es+}42qtmOOe-2ME!8t{K2`Jexo`W?yl_kWZgIAOv&t*XTJv&=Q9 ze?;=bRqWcn{z8NLXC$BBE3g}`m*tlm)PM9c>;Far{(|J+#1%`)^)meZ2K*-_-+%q_ z^GPp{{p=LwcO-vR4qP`QMspP&H+Wp3`0%-p*>cL=;$=&|3VkZxbJpi)_jvr%K7vn$ Fe*(I&+5G?j literal 0 HcmV?d00001 diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test new file mode 100644 index 0000000000000..0f0b8bc30de47 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test @@ -0,0 +1,40 @@ +; RUN: rm -rf %t +; RUN: mkdir -p %t +; RUN: cd %t + +; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof +; RUN: cp %S/Inputs/missing-dwarf.exe %t/missing-dwarf.exe + +; Test --load-function-from-symbol=0 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB + +; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function +; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function +; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function + +; Test --load-function-from-symbol=1 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB + +; CHECK-LOAD-SYMTAB: main:2:1 +; CHECK-LOAD-SYMTAB-NEXT: 1: 1 +; CHECK-LOAD-SYMTAB-NEXT: 2: 1 foo:1 +; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 281479271677951 +; CHECK-LOAD-SYMTAB-NEXT: foo:0:0 +; CHECK-LOAD-SYMTAB-NEXT: 1: 0 +; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 4294967295 + +; Build instructions: +; missing-dwarf.o: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g -o missing-dwarf.o -c +; missing-dwarf.exe: clang -fdebug-compilation-dir=. missing-dwarf.o -o missing-dwarf.exe -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g + +; Source code: + +int foo() { + return 1; +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 93f605a891afc..f601fc133df1c 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -66,7 +66,7 @@ static cl::list DisassembleFunctions( cl::cat(ProfGenCategory)); static cl::opt - LoadFunctionFromSymbol("load-function-from-symbol", + LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true), cl::desc("Gather additional binary function info " "from symbols (e.g. .symtab) in case " "dwarf info is incomplete."), @@ -264,7 +264,7 @@ void ProfiledBinary::load() { if (ShowDisassemblyOnly) decodePseudoProbe(Obj); - if (LoadFunctionFromSymbol || UsePseudoProbes) + if (LoadFunctionFromSymbol && UsePseudoProbes) populateSymbolsFromBinary(Obj); // Disassemble the text sections. @@ -853,8 +853,10 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); auto &Func = Ret.first->second; - if (Ret.second) + if (Ret.second) { Func.FuncName = Ret.first->first; + HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func; + } if (auto Range = findFuncRange(Addr)) { if (Ret.second && ShowDetailedWarning) From 75dc996056424a43aa3b68be63f14cdf106ba340 Mon Sep 17 00:00:00 2001 From: h2h Date: Fri, 24 Oct 2025 16:16:40 -0700 Subject: [PATCH 10/11] Nit --- llvm/include/llvm/ProfileData/SampleProf.h | 9 +++++---- llvm/test/tools/llvm-profgen/missing-dwarf.test | 5 ++--- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 6de5884253017..dd0495f29e8c3 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -1214,18 +1214,19 @@ class FunctionSamples { // Note the sequence of the suffixes in the knownSuffixes array matters. // If suffix "A" is appended after the suffix "B", "A" should be in front // of "B" in knownSuffixes. - const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr}; + const SmallVector KnownSuffixes{LLVMSuffix, PartSuffix, + UniqSuffix}; return getCanonicalFnName(FnName, KnownSuffixes, Attr); } - static StringRef getCanonicalFnName(StringRef FnName, const char *Suffixes[], + static StringRef getCanonicalFnName(StringRef FnName, + ArrayRef Suffixes, StringRef Attr = "selected") { if (Attr == "" || Attr == "all") return FnName.split('.').first; if (Attr == "selected") { StringRef Cand(FnName); - for (const char **Suf = Suffixes; *Suf; Suf++) { - StringRef Suffix(*Suf); + for (const auto Suffix : Suffixes) { // If the profile contains ".__uniq." suffix, don't strip the // suffix for names in the IR. if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test index 0f0b8bc30de47..0fc0d660133f1 100644 --- a/llvm/test/tools/llvm-profgen/missing-dwarf.test +++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test @@ -3,17 +3,16 @@ ; RUN: cd %t ; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof -; RUN: cp %S/Inputs/missing-dwarf.exe %t/missing-dwarf.exe ; Test --load-function-from-symbol=0 -; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB ; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function ; Test --load-function-from-symbol=1 -; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB ; CHECK-LOAD-SYMTAB: main:2:1 diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index f601fc133df1c..c1c0ea5fe6b3e 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -832,8 +832,8 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { // Load binary functions from symbol table when Debug info is incomplete - const char *Suffixes[] = {".destroy", ".resume", ".llvm.", - ".cold", ".warm", nullptr}; + const SmallVector Suffixes( + {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); StringRef FileName = Obj->getFileName(); for (const SymbolRef &Symbol : Obj->symbols()) { const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); From 5600e83a5f36ce7fe754f7a71a764d0cf35af15f Mon Sep 17 00:00:00 2001 From: h2h Date: Wed, 29 Oct 2025 17:21:49 -0700 Subject: [PATCH 11/11] Cleanup loggings and comments --- .../tools/llvm-profgen/missing-dwarf.test | 4 +- llvm/tools/llvm-profgen/PerfReader.cpp | 18 +++++++- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 33 --------------- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 41 ++++++++++++------- llvm/tools/llvm-profgen/ProfiledBinary.h | 1 + 5 files changed, 45 insertions(+), 52 deletions(-) diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test index 0fc0d660133f1..b96ae9018dae1 100644 --- a/llvm/test/tools/llvm-profgen/missing-dwarf.test +++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test @@ -7,9 +7,7 @@ ; Test --load-function-from-symbol=0 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB -; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function -; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function -; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function +; CHECK-NO-LOAD-SYMTAB: warning: Loading of DWARF info completed, but no binary functions have been retrieved. ; Test --load-function-from-symbol=1 ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index e9f7b666c95c7..1c4b79554e1a5 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -1274,6 +1274,8 @@ void PerfScriptReader::warnInvalidRange() { const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " "likely due to profile and binary mismatch."; + const char *DanglingRangeMsg = "Range does not belong to any functions, " + "likely from PLT, .init or .fini section."; const char *RangeCrossFuncMsg = "Fall through range should not cross function boundaries, likely due to " "profile and binary mismatch."; @@ -1281,6 +1283,8 @@ void PerfScriptReader::warnInvalidRange() { uint64_t TotalRangeNum = 0; uint64_t InstNotBoundary = 0; + uint64_t UnmatchedRange = 0; + uint64_t RecoveredRange = 0; uint64_t RangeCrossFunc = 0; uint64_t BogusRange = 0; @@ -1300,8 +1304,14 @@ void PerfScriptReader::warnInvalidRange() { } auto *FRange = Binary->findFuncRange(StartAddress); - if (!FRange) + if (!FRange) { + UnmatchedRange += I.second; + WarnInvalidRange(StartAddress, EndAddress, DanglingRangeMsg); continue; + } + + if (FRange->Func->FromSymtab) + RecoveredRange += I.second; if (EndAddress >= FRange->EndAddress) { RangeCrossFunc += I.second; @@ -1319,6 +1329,12 @@ void PerfScriptReader::warnInvalidRange() { emitWarningSummary( InstNotBoundary, TotalRangeNum, "of samples are from ranges that are not on instruction boundary."); + emitWarningSummary( + UnmatchedRange, TotalRangeNum, + "of samples are from ranges that do not belong to any functions."); + emitWarningSummary( + RecoveredRange, TotalRangeNum, + "of samples are from ranges that belong to functions recovered from symbol table."); emitWarningSummary( RangeCrossFunc, TotalRangeNum, "of samples are from ranges that do cross function boundaries."); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 2f6f50912fbcf..3b875c5de3c09 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -449,62 +449,29 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( // Go through all the stacks, ranges and branches in sample counters, use // the start of the range to look up the function it belongs and record the // function. - uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0; - uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0; for (const auto &CI : *SampleCounters) { if (const auto *CtxKey = dyn_cast(CI.first.getPtr())) { for (auto StackAddr : CtxKey->Context) { - uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0; - TotalStkAddr += Inc; if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) ProfiledFunctions.insert(FRange->Func); - else - ErrStkAddr += Inc; } } for (auto Item : CI.second.RangeCounter) { uint64_t StartAddress = Item.first.first; - uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0; - TotalFuncRange += Inc; if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) ProfiledFunctions.insert(FRange->Func); - else - ErrFuncRange += Inc; } for (auto Item : CI.second.BranchCounter) { uint64_t SourceAddress = Item.first.first; uint64_t TargetAddress = Item.first.second; - uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0; - uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0; - TotalSrc += SrcInc; if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) ProfiledFunctions.insert(FRange->Func); - else - ErrSrc += SrcInc; - TotalTgt += TgtInc; if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) ProfiledFunctions.insert(FRange->Func); - else - ErrTgt += TgtInc; } } - - if (ErrStkAddr) - emitWarningSummary( - ErrStkAddr, TotalStkAddr, - "of stack address samples do not belong to any function"); - if (ErrFuncRange) - emitWarningSummary( - ErrFuncRange, TotalFuncRange, - "of function range samples do not belong to any function"); - if (ErrSrc) - emitWarningSummary(ErrSrc, TotalSrc, - "of LBR source samples do not belong to any function"); - if (ErrTgt) - emitWarningSummary(ErrTgt, TotalTgt, - "of LBR target samples do not belong to any function"); return true; } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index c1c0ea5fe6b3e..e65bafa9ac4ca 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -831,13 +831,21 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { } void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { - // Load binary functions from symbol table when Debug info is incomplete - const SmallVector Suffixes( - {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); + // Load binary functions from symbol table when Debug info is incomplete. + // Strip the internal suffixes which are not reflected in the DWARF info. + const SmallVector Suffixes( + { + // Internal suffixes from CoroSplit pass + ".cleanup", ".destroy", ".resume", + // Internal suffixes from Bolt + ".cold", ".warm", + // Compiler internal + ".llvm.", + }); StringRef FileName = Obj->getFileName(); for (const SymbolRef &Symbol : Obj->symbols()) { const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); - const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); + const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName); const StringRef Name = unwrapOrError(Symbol.getName(), FileName); uint64_t Size = 0; if (isa(Symbol.getObject())) { @@ -855,25 +863,26 @@ void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { auto &Func = Ret.first->second; if (Ret.second) { Func.FuncName = Ret.first->first; + Func.FromSymtab = true; HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func; } - if (auto Range = findFuncRange(Addr)) { - if (Ret.second && ShowDetailedWarning) + if (auto Range = findFuncRange(StartAddr)) { + if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning) WithColor::warning() - << "Symbol " << Name << " start address " - << format("%8" PRIx64, Addr) << " already exists in DWARF at " - << format("%8" PRIx64, Range->StartAddress) << " in function " - << Range->getFuncName() << "\n"; + << "Conflicting symbol " << Name << " already exists in DWARF as " + << Range->getFuncName() << " at address " << format("%8" PRIx64, StartAddr) + << ". The DWARF indicates a range from " << format("%8" PRIx64, Range->StartAddress) << " to " + << format("%8" PRIx64, Range->EndAddress) << "\n"; } else { // Store/Update Function Range from SymTab - Func.Ranges.emplace_back(Addr, Addr + Size); + Func.Ranges.emplace_back(StartAddr, StartAddr + Size); - auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange()); + auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange()); FuncRange &FRange = R.first->second; FRange.Func = &Func; - FRange.StartAddress = Addr; - FRange.EndAddress = Addr + Size; + FRange.StartAddress = StartAddr; + FRange.EndAddress = StartAddr + Size; } } } @@ -902,8 +911,10 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { // BinaryFunction indexed by the name. auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); auto &Func = Ret.first->second; - if (Ret.second) + if (Ret.second) { Func.FuncName = Ret.first->first; + Func.FromSymtab = false; + } for (const auto &Range : Ranges) { uint64_t StartAddress = Range.LowPC; diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index e73ffd3143e3d..753fcc935716b 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -76,6 +76,7 @@ struct BinaryFunction { StringRef FuncName; // End of range is an exclusive bound. RangesTy Ranges; + bool FromSymtab; uint64_t getFuncSize() { uint64_t Sum = 0;