diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 3dd34aba2d716..dd0495f29e8c3 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -1214,13 +1214,19 @@ class FunctionSamples { // Note the sequence of the suffixes in the knownSuffixes array matters. // If suffix "A" is appended after the suffix "B", "A" should be in front // of "B" in knownSuffixes. - const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; + const SmallVector KnownSuffixes{LLVMSuffix, PartSuffix, + UniqSuffix}; + return getCanonicalFnName(FnName, KnownSuffixes, Attr); + } + + static StringRef getCanonicalFnName(StringRef FnName, + ArrayRef Suffixes, + StringRef Attr = "selected") { if (Attr == "" || Attr == "all") return FnName.split('.').first; if (Attr == "selected") { StringRef Cand(FnName); - for (const auto &Suf : KnownSuffixes) { - StringRef Suffix(Suf); + for (const auto Suffix : Suffixes) { // If the profile contains ".__uniq." suffix, don't strip the // suffix for names in the IR. if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) diff --git a/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe b/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe new file mode 100755 index 0000000000000..c4b8af0bf1f2a Binary files /dev/null and b/llvm/test/tools/llvm-profgen/Inputs/missing-dwarf.exe differ diff --git a/llvm/test/tools/llvm-profgen/missing-dwarf.test b/llvm/test/tools/llvm-profgen/missing-dwarf.test new file mode 100644 index 0000000000000..b96ae9018dae1 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/missing-dwarf.test @@ -0,0 +1,37 @@ +; RUN: rm -rf %t +; RUN: mkdir -p %t +; RUN: cd %t + +; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof + +; Test --load-function-from-symbol=0 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB + +; CHECK-NO-LOAD-SYMTAB: warning: Loading of DWARF info completed, but no binary functions have been retrieved. + +; Test --load-function-from-symbol=1 +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB + +; CHECK-LOAD-SYMTAB: main:2:1 +; CHECK-LOAD-SYMTAB-NEXT: 1: 1 +; CHECK-LOAD-SYMTAB-NEXT: 2: 1 foo:1 +; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 281479271677951 +; CHECK-LOAD-SYMTAB-NEXT: foo:0:0 +; CHECK-LOAD-SYMTAB-NEXT: 1: 0 +; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 4294967295 + +; Build instructions: +; missing-dwarf.o: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g -o missing-dwarf.o -c +; missing-dwarf.exe: clang -fdebug-compilation-dir=. missing-dwarf.o -o missing-dwarf.exe -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g + +; Source code: + +int foo() { + return 1; +} + +int main() { + foo(); + return 0; +} diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index 183b248a72320..1c4b79554e1a5 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -1284,6 +1284,7 @@ void PerfScriptReader::warnInvalidRange() { uint64_t TotalRangeNum = 0; uint64_t InstNotBoundary = 0; uint64_t UnmatchedRange = 0; + uint64_t RecoveredRange = 0; uint64_t RangeCrossFunc = 0; uint64_t BogusRange = 0; @@ -1309,6 +1310,9 @@ void PerfScriptReader::warnInvalidRange() { continue; } + if (FRange->Func->FromSymtab) + RecoveredRange += I.second; + if (EndAddress >= FRange->EndAddress) { RangeCrossFunc += I.second; WarnInvalidRange(StartAddress, EndAddress, RangeCrossFuncMsg); @@ -1328,6 +1332,9 @@ void PerfScriptReader::warnInvalidRange() { emitWarningSummary( UnmatchedRange, TotalRangeNum, "of samples are from ranges that do not belong to any functions."); + emitWarningSummary( + RecoveredRange, TotalRangeNum, + "of samples are from ranges that belong to functions recovered from symbol table."); emitWarningSummary( RangeCrossFunc, TotalRangeNum, "of samples are from ranges that do cross function boundaries."); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 94728ce4abffe..e65bafa9ac4ca 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -65,6 +65,13 @@ static cl::list DisassembleFunctions( "names only. Only work with show-disassembly-only"), cl::cat(ProfGenCategory)); +static cl::opt + LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true), + cl::desc("Gather additional binary function info " + "from symbols (e.g. .symtab) in case " + "dwarf info is incomplete."), + cl::cat(ProfGenCategory)); + static cl::opt KernelBinary("kernel", cl::desc("Generate the profile for Linux kernel binary."), @@ -257,6 +264,9 @@ void ProfiledBinary::load() { if (ShowDisassemblyOnly) decodePseudoProbe(Obj); + if (LoadFunctionFromSymbol && UsePseudoProbes) + populateSymbolsFromBinary(Obj); + // Disassemble the text sections. disassemble(Obj); @@ -604,13 +614,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, // Record potential call targets for tail frame inference later-on. if (InferMissingFrames && FRange) { uint64_t Target = 0; - MIA->evaluateBranch(Inst, Address, Size, Target); + bool Err = MIA->evaluateBranch(Inst, Address, Size, Target); if (MCDesc.isCall()) { // Indirect call targets are unknown at this point. Recording the // unknown target (zero) for further LBR-based refinement. MissingContextInferrer->CallEdges[Address].insert(Target); } else if (MCDesc.isUnconditionalBranch()) { - assert(Target && + assert(Err && "target should be known for unconditional direct branch"); // Any inter-function unconditional jump is considered tail call at // this point. This is not 100% accurate and could further be @@ -820,6 +830,63 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { } } +void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { + // Load binary functions from symbol table when Debug info is incomplete. + // Strip the internal suffixes which are not reflected in the DWARF info. + const SmallVector Suffixes( + { + // Internal suffixes from CoroSplit pass + ".cleanup", ".destroy", ".resume", + // Internal suffixes from Bolt + ".cold", ".warm", + // Compiler internal + ".llvm.", + }); + StringRef FileName = Obj->getFileName(); + for (const SymbolRef &Symbol : Obj->symbols()) { + const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); + const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName); + const StringRef Name = unwrapOrError(Symbol.getName(), FileName); + uint64_t Size = 0; + if (isa(Symbol.getObject())) { + ELFSymbolRef ElfSymbol(Symbol); + Size = ElfSymbol.getSize(); + } + + if (Size == 0 || Type != SymbolRef::ST_Function) + continue; + + const StringRef SymName = + FunctionSamples::getCanonicalFnName(Name, Suffixes); + + auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); + auto &Func = Ret.first->second; + if (Ret.second) { + Func.FuncName = Ret.first->first; + Func.FromSymtab = true; + HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func; + } + + if (auto Range = findFuncRange(StartAddr)) { + if (Ret.second && Range->getFuncName() != SymName && ShowDetailedWarning) + WithColor::warning() + << "Conflicting symbol " << Name << " already exists in DWARF as " + << Range->getFuncName() << " at address " << format("%8" PRIx64, StartAddr) + << ". The DWARF indicates a range from " << format("%8" PRIx64, Range->StartAddress) << " to " + << format("%8" PRIx64, Range->EndAddress) << "\n"; + } else { + // Store/Update Function Range from SymTab + Func.Ranges.emplace_back(StartAddr, StartAddr + Size); + + auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange()); + FuncRange &FRange = R.first->second; + FRange.Func = &Func; + FRange.StartAddress = StartAddr; + FRange.EndAddress = StartAddr + Size; + } + } +} + void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { for (const auto &DieInfo : CompilationUnit.dies()) { llvm::DWARFDie Die(&CompilationUnit, &DieInfo); @@ -844,8 +911,10 @@ void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { // BinaryFunction indexed by the name. auto Ret = BinaryFunctions.emplace(Name, BinaryFunction()); auto &Func = Ret.first->second; - if (Ret.second) + if (Ret.second) { Func.FuncName = Ret.first->first; + Func.FromSymtab = false; + } for (const auto &Range : Ranges) { uint64_t StartAddress = Range.LowPC; diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 5a814b7dbd52d..753fcc935716b 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -76,6 +76,7 @@ struct BinaryFunction { StringRef FuncName; // End of range is an exclusive bound. RangesTy Ranges; + bool FromSymtab; uint64_t getFuncSize() { uint64_t Sum = 0; @@ -356,6 +357,9 @@ class ProfiledBinary { // Create symbol to its start address mapping. void populateSymbolAddressList(const object::ObjectFile *O); + // Load functions from its symbol table (when DWARF info is missing). + void populateSymbolsFromBinary(const object::ObjectFile *O); + // A function may be spilt into multiple non-continuous address ranges. We use // this to set whether start a function range is the real entry of the // function and also set false to the non-function label.