- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete #163654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
f34ab2d
              a1f32b5
              0fd352d
              c097d37
              a19064d
              e12e694
              5eead6b
              8f59bfa
              a967994
              0dc2c66
              75dc996
              5600e83
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| ; RUN: rm -rf %t | ||
| ; RUN: mkdir -p %t | ||
| ; RUN: cd %t | ||
|  | ||
| ; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof | ||
| ; RUN: cp %S/Inputs/missing-dwarf.exe %t/missing-dwarf.exe | ||
|          | ||
|  | ||
| ; Test --load-function-from-symbol=0 | ||
| ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB | ||
|  | ||
| ; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function | ||
| ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function | ||
| ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function | ||
|  | ||
| ; Test --load-function-from-symbol=1 | ||
| ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%t/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 | ||
| ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB | ||
|  | ||
| ; CHECK-LOAD-SYMTAB: main:2:1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 1: 1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 2: 1 foo:1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 281479271677951 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: foo:0:0 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 1: 0 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 4294967295 | ||
|  | ||
| ; Build instructions: | ||
| ; missing-dwarf.o: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g -o missing-dwarf.o -c | ||
| ; missing-dwarf.exe: clang -fdebug-compilation-dir=. missing-dwarf.o -o missing-dwarf.exe -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g | ||
|  | ||
| ; Source code: | ||
|  | ||
| int foo() { | ||
| return 1; | ||
| } | ||
|  | ||
| int main() { | ||
| foo(); | ||
| return 0; | ||
| } | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -449,29 +449,62 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( | |
| // Go through all the stacks, ranges and branches in sample counters, use | ||
| // the start of the range to look up the function it belongs and record the | ||
| // function. | ||
| uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0; | ||
| uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0; | ||
| for (const auto &CI : *SampleCounters) { | ||
| if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { | ||
| for (auto StackAddr : CtxKey->Context) { | ||
| uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0; | ||
| TotalStkAddr += Inc; | ||
|          | ||
| if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrStkAddr += Inc; | ||
| } | ||
| } | ||
|  | ||
| for (auto Item : CI.second.RangeCounter) { | ||
| uint64_t StartAddress = Item.first.first; | ||
| uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0; | ||
| TotalFuncRange += Inc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrFuncRange += Inc; | ||
| } | ||
|  | ||
| for (auto Item : CI.second.BranchCounter) { | ||
| uint64_t SourceAddress = Item.first.first; | ||
| uint64_t TargetAddress = Item.first.second; | ||
| uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0; | ||
| uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0; | ||
| TotalSrc += SrcInc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrSrc += SrcInc; | ||
| TotalTgt += TgtInc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrTgt += TgtInc; | ||
| } | ||
| } | ||
|  | ||
| if (ErrStkAddr) | ||
|          | ||
| emitWarningSummary( | ||
| ErrStkAddr, TotalStkAddr, | ||
| "of stack address samples do not belong to any function"); | ||
| if (ErrFuncRange) | ||
| emitWarningSummary( | ||
| ErrFuncRange, TotalFuncRange, | ||
| "of function range samples do not belong to any function"); | ||
|          | ||
| if (ErrSrc) | ||
| emitWarningSummary(ErrSrc, TotalSrc, | ||
| "of LBR source samples do not belong to any function"); | ||
| if (ErrTgt) | ||
| emitWarningSummary(ErrTgt, TotalTgt, | ||
| "of LBR target samples do not belong to any function"); | ||
|          | ||
| return true; | ||
| } | ||
|  | ||
|  | ||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|  | @@ -65,6 +65,13 @@ static cl::list<std::string> DisassembleFunctions( | |||||||||||
| "names only. Only work with show-disassembly-only"), | ||||||||||||
| cl::cat(ProfGenCategory)); | ||||||||||||
|  | ||||||||||||
| static cl::opt<bool> | ||||||||||||
| LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true), | ||||||||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @wlei-llvm somehow i remember we had something like that before (long time ago)? did we remove the symbol table loading code in the past? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, iirc, we initially only loaded the function ranges from symbol table, then after we found the issue with the multi-version function(foo.cold, foo.resume...), we redesigned and fully switched to load the symbol from dwarf. At that time,  the code was implemented inside the  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For reference, this is the patch https://reviews.llvm.org/D112282 we did the switch to use dwarf symbol. | ||||||||||||
| cl::desc("Gather additional binary function info " | ||||||||||||
| "from symbols (e.g. .symtab) in case " | ||||||||||||
| "dwarf info is incomplete."), | ||||||||||||
| cl::cat(ProfGenCategory)); | ||||||||||||
|  | ||||||||||||
| static cl::opt<bool> | ||||||||||||
| KernelBinary("kernel", | ||||||||||||
| cl::desc("Generate the profile for Linux kernel binary."), | ||||||||||||
|  | @@ -257,6 +264,9 @@ void ProfiledBinary::load() { | |||||||||||
| if (ShowDisassemblyOnly) | ||||||||||||
| decodePseudoProbe(Obj); | ||||||||||||
|  | ||||||||||||
| if (LoadFunctionFromSymbol && UsePseudoProbes) | ||||||||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is pseudo-probe required here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ohh, this is to be compatible with the code here: llvm-project/llvm/tools/llvm-profgen/ProfileGenerator.cpp Lines 510 to 514 in af110e1 
 UsePseudoProbesis needed so we won't use line numbers to generate profile, which won't work with symtab because symtab only have addresses | ||||||||||||
| populateSymbolsFromBinary(Obj); | ||||||||||||
|  | ||||||||||||
| // Disassemble the text sections. | ||||||||||||
| disassemble(Obj); | ||||||||||||
|  | ||||||||||||
|  | @@ -604,13 +614,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, | |||||||||||
| // Record potential call targets for tail frame inference later-on. | ||||||||||||
| if (InferMissingFrames && FRange) { | ||||||||||||
| uint64_t Target = 0; | ||||||||||||
| MIA->evaluateBranch(Inst, Address, Size, Target); | ||||||||||||
| bool Err = MIA->evaluateBranch(Inst, Address, Size, Target); | ||||||||||||
| if (MCDesc.isCall()) { | ||||||||||||
| // Indirect call targets are unknown at this point. Recording the | ||||||||||||
| // unknown target (zero) for further LBR-based refinement. | ||||||||||||
| MissingContextInferrer->CallEdges[Address].insert(Target); | ||||||||||||
| } else if (MCDesc.isUnconditionalBranch()) { | ||||||||||||
| assert(Target && | ||||||||||||
| assert(Err && | ||||||||||||
| "target should be known for unconditional direct branch"); | ||||||||||||
| // Any inter-function unconditional jump is considered tail call at | ||||||||||||
| // this point. This is not 100% accurate and could further be | ||||||||||||
|  | @@ -820,6 +830,54 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { | |||||||||||
| } | ||||||||||||
| } | ||||||||||||
|  | ||||||||||||
| void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { | ||||||||||||
| // Load binary functions from symbol table when Debug info is incomplete | ||||||||||||
| const char *Suffixes[] = {".destroy", ".resume", ".llvm.", | ||||||||||||
| ".cold", ".warm", nullptr}; | ||||||||||||
| StringRef FileName = Obj->getFileName(); | ||||||||||||
| for (const SymbolRef &Symbol : Obj->symbols()) { | ||||||||||||
| const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); | ||||||||||||
| const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); | ||||||||||||
|          | ||||||||||||
| const StringRef Name = unwrapOrError(Symbol.getName(), FileName); | ||||||||||||
| uint64_t Size = 0; | ||||||||||||
| if (isa<ELFObjectFileBase>(Symbol.getObject())) { | ||||||||||||
| ELFSymbolRef ElfSymbol(Symbol); | ||||||||||||
| Size = ElfSymbol.getSize(); | ||||||||||||
| } | ||||||||||||
|  | ||||||||||||
| if (Size == 0 || Type != SymbolRef::ST_Function) | ||||||||||||
| continue; | ||||||||||||
|  | ||||||||||||
| const StringRef SymName = | ||||||||||||
| FunctionSamples::getCanonicalFnName(Name, Suffixes); | ||||||||||||
|  | ||||||||||||
| auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); | ||||||||||||
| auto &Func = Ret.first->second; | ||||||||||||
| if (Ret.second) { | ||||||||||||
| Func.FuncName = Ret.first->first; | ||||||||||||
| HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func; | ||||||||||||
| } | ||||||||||||
|  | ||||||||||||
| if (auto Range = findFuncRange(Addr)) { | ||||||||||||
| if (Ret.second && ShowDetailedWarning) | ||||||||||||
| WithColor::warning() | ||||||||||||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this result in a large number of warnings since most of the symbols can be obtained from DWARF? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the  
 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Symbol table finding entry already existing in dwarf should not be a warning. When that happens, we should check if they both point to same ranges, and only warn when 
 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated this part and the warning shall now capture case 1.  And I've merged (2) into the  | ||||||||||||
| << "Symbol " << Name << " start address " | ||||||||||||
| << format("%8" PRIx64, Addr) << " already exists in DWARF at " | ||||||||||||
| << format("%8" PRIx64, Range->StartAddress) << " in function " | ||||||||||||
| << Range->getFuncName() << "\n"; | ||||||||||||
| } else { | ||||||||||||
| // Store/Update Function Range from SymTab | ||||||||||||
| Func.Ranges.emplace_back(Addr, Addr + Size); | ||||||||||||
|  | ||||||||||||
| auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange()); | ||||||||||||
| FuncRange &FRange = R.first->second; | ||||||||||||
| FRange.Func = &Func; | ||||||||||||
| FRange.StartAddress = Addr; | ||||||||||||
| FRange.EndAddress = Addr + Size; | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|  | ||||||||||||
| void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { | ||||||||||||
| for (const auto &DieInfo : CompilationUnit.dies()) { | ||||||||||||
| llvm::DWARFDie Die(&CompilationUnit, &DieInfo); | ||||||||||||
|  | ||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NIT:
llvm-project/llvm/include/llvm/ADT/ArrayRef.h
Line 128 in 6a0f392