-
Notifications
You must be signed in to change notification settings - Fork 15k
[llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete #163654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 6 commits
f34ab2d
a1f32b5
0fd352d
c097d37
a19064d
e12e694
5eead6b
8f59bfa
a967994
0dc2c66
75dc996
5600e83
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -1214,13 +1214,18 @@ class FunctionSamples { | |||
| // Note the sequence of the suffixes in the knownSuffixes array matters. | ||||
| // If suffix "A" is appended after the suffix "B", "A" should be in front | ||||
| // of "B" in knownSuffixes. | ||||
| const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; | ||||
| const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr}; | ||||
|
||||
| ArrayRef(const iterator_range<U *> &Range) |
getCanonicalFnName(FnName, KnownSuffixes, Attr);
static StringRef getCanonicalFnName(StringRef FnName, ArrayRef<StringRef> Suffixes,
StringRef Attr = "selected") {
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -449,29 +449,62 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( | |
| // Go through all the stacks, ranges and branches in sample counters, use | ||
| // the start of the range to look up the function it belongs and record the | ||
| // function. | ||
| uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0; | ||
| uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0; | ||
| for (const auto &CI : *SampleCounters) { | ||
| if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { | ||
| for (auto StackAddr : CtxKey->Context) { | ||
| uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0; | ||
| TotalStkAddr += Inc; | ||
|
||
| if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrStkAddr += Inc; | ||
| } | ||
| } | ||
|
|
||
| for (auto Item : CI.second.RangeCounter) { | ||
| uint64_t StartAddress = Item.first.first; | ||
| uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0; | ||
| TotalFuncRange += Inc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrFuncRange += Inc; | ||
| } | ||
|
|
||
| for (auto Item : CI.second.BranchCounter) { | ||
| uint64_t SourceAddress = Item.first.first; | ||
| uint64_t TargetAddress = Item.first.second; | ||
| uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0; | ||
| uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0; | ||
| TotalSrc += SrcInc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrSrc += SrcInc; | ||
| TotalTgt += TgtInc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrTgt += TgtInc; | ||
| } | ||
| } | ||
|
|
||
| if (ErrStkAddr) | ||
|
||
| emitWarningSummary( | ||
| ErrStkAddr, TotalStkAddr, | ||
| "of stack address samples do not belong to any function"); | ||
| if (ErrFuncRange) | ||
| emitWarningSummary( | ||
| ErrFuncRange, TotalFuncRange, | ||
| "of function range samples do not belong to any function"); | ||
|
||
| if (ErrSrc) | ||
| emitWarningSummary(ErrSrc, TotalSrc, | ||
| "of LBR source samples do not belong to any function"); | ||
| if (ErrTgt) | ||
| emitWarningSummary(ErrTgt, TotalTgt, | ||
| "of LBR target samples do not belong to any function"); | ||
|
||
| return true; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -257,6 +257,8 @@ void ProfiledBinary::load() { | |||
| if (ShowDisassemblyOnly) | ||||
| decodePseudoProbe(Obj); | ||||
|
|
||||
| populateSymbolsFromBinary(Obj); | ||||
|
|
||||
| // Disassemble the text sections. | ||||
| disassemble(Obj); | ||||
|
|
||||
|
|
@@ -604,13 +606,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, | |||
| // Record potential call targets for tail frame inference later-on. | ||||
| if (InferMissingFrames && FRange) { | ||||
| uint64_t Target = 0; | ||||
| MIA->evaluateBranch(Inst, Address, Size, Target); | ||||
| bool Err = MIA->evaluateBranch(Inst, Address, Size, Target); | ||||
| if (MCDesc.isCall()) { | ||||
| // Indirect call targets are unknown at this point. Recording the | ||||
| // unknown target (zero) for further LBR-based refinement. | ||||
| MissingContextInferrer->CallEdges[Address].insert(Target); | ||||
| } else if (MCDesc.isUnconditionalBranch()) { | ||||
| assert(Target && | ||||
| assert(Err && | ||||
| "target should be known for unconditional direct branch"); | ||||
| // Any inter-function unconditional jump is considered tail call at | ||||
| // this point. This is not 100% accurate and could further be | ||||
|
|
@@ -820,6 +822,48 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { | |||
| } | ||||
| } | ||||
|
|
||||
| void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { | ||||
| // Load binary functions from symbol table when Debug info is incomplete | ||||
| StringRef FileName = Obj->getFileName(); | ||||
| for (const SymbolRef &Symbol : Obj->symbols()) { | ||||
| const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); | ||||
| const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); | ||||
|
||||
| const StringRef Name = unwrapOrError(Symbol.getName(), FileName); | ||||
| const uint64_t Size = Symbol.getSize(); | ||||
|
|
||||
| if (Size == 0 || Type != SymbolRef::ST_Function) | ||||
| continue; | ||||
|
|
||||
| const char *Suffixes[] = {".destroy", ".resume", ".llvm.", | ||||
| ".cold", ".warm", nullptr}; | ||||
| const StringRef SymName = | ||||
| FunctionSamples::getCanonicalFnName(Name, Suffixes); | ||||
|
|
||||
| auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); | ||||
| auto &Func = Ret.first->second; | ||||
| if (Ret.second) | ||||
| Func.FuncName = Ret.first->first; | ||||
|
|
||||
| if (auto Range = findFuncRange(Addr)) { | ||||
| if (Ret.second && ShowDetailedWarning) | ||||
| WithColor::warning() | ||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this result in a large number of warnings since most of the symbols can be obtained from DWARF? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Symbol table finding entry already existing in dwarf should not be a warning. When that happens, we should check if they both point to same ranges, and only warn when
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated this part and the warning shall now capture case 1. And I've merged (2) into the |
||||
| << "Symbol " << Name << " start address " | ||||
| << format("%8" PRIx64, Addr) << " already exists in DWARF at " | ||||
| << format("%8" PRIx64, Range->StartAddress) << " in function " | ||||
| << Range->getFuncName() << "\n"; | ||||
| } else { | ||||
| // Store/Update Function Range from SymTab | ||||
| Func.Ranges.emplace_back(Addr, Addr + Size); | ||||
|
|
||||
| auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange()); | ||||
| FuncRange &FRange = R.first->second; | ||||
| FRange.Func = &Func; | ||||
| FRange.StartAddress = Addr; | ||||
| FRange.EndAddress = Addr + Size; | ||||
| } | ||||
| } | ||||
| } | ||||
|
|
||||
| void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { | ||||
| for (const auto &DieInfo : CompilationUnit.dies()) { | ||||
| llvm::DWARFDie Die(&CompilationUnit, &DieInfo); | ||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For COFF. getCommonSymbolSizeImpl returns symbol value which is start address offset of this function.
llvm-project/llvm/lib/Object/COFFObjectFile.cpp
Lines 240 to 243 in 7a54353
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ahh, thanks for checking on this! Is this behavior intended for COFF? Because there's another
SymbolRef::getCommonSymbolSizethat does exactly the same thing but with a assertation check:llvm-project/llvm/include/llvm/Object/ObjectFile.h
Line 314 in d4713ec
Would it be better to fix the
SymbolRefor the COFFgetCommonSymbolSizeImplto either get the actual symbol size or return 0?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getCommonSymbolSize is used for common symbol.
You can use -fcommon to explicitly generate them. e.g. for a simple global variable
int cval;For ELF, there's
sizefiled.sizeandValueof common symbol are all its size.For COFF, there's no
sizefield , the symbol's value is its size. I think COFF's implementation is correct.Regarding function symbol's size.
For ELF, It is the function's size. I think you can use ELFSymbolRef::getSize() safely.
For COFF, it does not support it currently. I think we should use value 0.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think size for COFF can be supported by PE format, although it has not been supported yet.
https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#auxiliary-format-2-bf-and-ef-symbols
May be we can file another PR to support it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gotcha! For now I've added another virtual function
ObjectFile::getSymbolSizeImplthat returns 0 by default. The ELFObjectFile overrides this and returnsgetSymbolSize.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should add a new virtual function in ObjectFile since the symbol size is only valid for part of symbol table. That's why there's getSize methond in ELFSymbolRef.
BTW, ObjectFile::getSymbolSizeImpl should return the size for common symbol just like getCommonSize.
How about define a function/lambda to get function symbol's size in ProfiledBinary::populateSymbolsFromBinary to avoid modifying ObjectFile interface?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
makes sense. let me change it to ELF-specific then. I did something like this: