@@ -37,6 +37,14 @@ cl::opt<bool> ShowSourceLocations("show-source-locations",
3737 cl::desc (" Print source locations." ),
3838 cl::cat(ProfGenCategory));
3939
40+ cl::opt<bool > LoadFunctionFromSymbol (
41+ " load-function-from-symbol" , cl::init(true ),
42+ cl::desc(
43+ " Gather additional binary function info from symbols (e.g. .symtab) in "
44+ " case dwarf info is incomplete. Only support binaries in ELF format "
45+ " with pseudo probe, for other formats, this flag will be a no-op." ),
46+ cl::cat(ProfGenCategory));
47+
4048static cl::opt<bool >
4149 ShowCanonicalFnName (" show-canonical-fname" ,
4250 cl::desc (" Print canonical function name." ),
@@ -257,6 +265,9 @@ void ProfiledBinary::load() {
257265 if (ShowDisassemblyOnly)
258266 decodePseudoProbe (Obj);
259267
268+ if (LoadFunctionFromSymbol && UsePseudoProbes)
269+ loadSymbolsFromSymtab (Obj);
270+
260271 // Disassemble the text sections.
261272 disassemble (Obj);
262273
@@ -461,6 +472,13 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) {
461472 } else {
462473 for (auto *F : ProfiledFunctions) {
463474 GuidFilter.insert (Function::getGUIDAssumingExternalLinkage (F->FuncName ));
475+ // DWARF name might be broken when a DWARF32 .debug_str.dwo section
476+ // execeeds 4GB. We expect symbol table to contain the correct function
477+ // names which matches the pseudo probe. Adding back all the GUIDs if
478+ // possible.
479+ auto AltGUIDs = AlternativeFunctionGUIDs.equal_range (F);
480+ for (const auto &[_, Func] : make_range (AltGUIDs))
481+ GuidFilter.insert (Func);
464482 for (auto &Range : F->Ranges ) {
465483 auto GUIDs = StartAddrToSymMap.equal_range (Range.first );
466484 for (const auto &[StartAddr, Func] : make_range (GUIDs))
@@ -522,7 +540,9 @@ void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
522540 // Set IsFuncEntry to ture if there is only one range in the function or the
523541 // RangeSymName from ELF is equal to its DWARF-based function name.
524542 if (FuncRange->Func ->Ranges .size () == 1 ||
525- (!FuncRange->IsFuncEntry && FuncRange->getFuncName () == RangeSymName))
543+ (!FuncRange->IsFuncEntry &&
544+ (FuncRange->getFuncName () == RangeSymName ||
545+ FuncRange->Func ->NameStatus != DwarfNameStatus::Matched)))
526546 FuncRange->IsFuncEntry = true ;
527547}
528548
@@ -604,13 +624,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
604624 // Record potential call targets for tail frame inference later-on.
605625 if (InferMissingFrames && FRange) {
606626 uint64_t Target = 0 ;
607- MIA->evaluateBranch (Inst, Address, Size, Target);
627+ bool Err = MIA->evaluateBranch (Inst, Address, Size, Target);
608628 if (MCDesc.isCall ()) {
609629 // Indirect call targets are unknown at this point. Recording the
610630 // unknown target (zero) for further LBR-based refinement.
611631 MissingContextInferrer->CallEdges [Address].insert (Target);
612632 } else if (MCDesc.isUnconditionalBranch ()) {
613- assert (Target &&
633+ assert (Err &&
614634 " target should be known for unconditional direct branch" );
615635 // Any inter-function unconditional jump is considered tail call at
616636 // this point. This is not 100% accurate and could further be
@@ -820,6 +840,100 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
820840 }
821841}
822842
843+ void ProfiledBinary::loadSymbolsFromSymtab (const ObjectFile *Obj) {
844+ // Load binary functions from symbol table when Debug info is incomplete.
845+ // Strip the internal suffixes which are not reflected in the DWARF info.
846+ const SmallVector<StringRef, 10 > Suffixes (
847+ {// Internal suffixes from CoroSplit pass
848+ " .cleanup" , " .destroy" , " .resume" ,
849+ // Internal suffixes from Bolt
850+ " .cold" , " .warm" ,
851+ // Compiler/LTO internal
852+ " .llvm." , " .part." , " .isra." , " .constprop." , " .lto_priv." });
853+ StringRef FileName = Obj->getFileName ();
854+ // Only apply this to ELF binary. e.g. COFF file format doesn't have `size`
855+ // field in the symbol table.
856+ bool IsELFObject = isa<ELFObjectFileBase>(Obj);
857+ if (!IsELFObject)
858+ return ;
859+ for (const SymbolRef &Symbol : Obj->symbols ()) {
860+ const SymbolRef::Type Type = unwrapOrError (Symbol.getType (), FileName);
861+ const uint64_t StartAddr = unwrapOrError (Symbol.getAddress (), FileName);
862+ const StringRef Name = unwrapOrError (Symbol.getName (), FileName);
863+ uint64_t Size = 0 ;
864+ if (LLVM_LIKELY (IsELFObject)) {
865+ ELFSymbolRef ElfSymbol (Symbol);
866+ Size = ElfSymbol.getSize ();
867+ }
868+
869+ if (Size == 0 || Type != SymbolRef::ST_Function)
870+ continue ;
871+
872+ const uint64_t EndAddr = StartAddr + Size;
873+ const StringRef SymName =
874+ FunctionSamples::getCanonicalFnName (Name, Suffixes);
875+ assert (StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress () &&
876+ " Function range is invalid." );
877+
878+ auto Range = findFuncRange (StartAddr);
879+ if (!Range) {
880+ assert (findFuncRange (EndAddr - 1 ) == nullptr &&
881+ " Function range overlaps with existing functions." );
882+ // Function from symbol table not found previously in DWARF, store ranges.
883+ auto Ret = BinaryFunctions.emplace (SymName, BinaryFunction ());
884+ auto &Func = Ret.first ->second ;
885+ if (Ret.second ) {
886+ Func.FuncName = Ret.first ->first ;
887+ HashBinaryFunctions[Function::getGUIDAssumingExternalLinkage (SymName)] =
888+ &Func;
889+ }
890+
891+ Func.NameStatus = DwarfNameStatus::Missing;
892+ Func.Ranges .emplace_back (StartAddr, EndAddr);
893+
894+ auto R = StartAddrToFuncRangeMap.emplace (StartAddr, FuncRange ());
895+ FuncRange &FRange = R.first ->second ;
896+
897+ FRange.Func = &Func;
898+ FRange.StartAddress = StartAddr;
899+ FRange.EndAddress = EndAddr;
900+
901+ } else if (SymName != Range->getFuncName ()) {
902+ // Function range already found from DWARF, but the symbol name from
903+ // symbol table is inconsistent with debug info. Log this discrepancy and
904+ // the alternative function GUID.
905+ if (ShowDetailedWarning)
906+ WithColor::warning ()
907+ << " Conflicting name for symbol " << Name << " with range ("
908+ << format (" %8" PRIx64, StartAddr) << " , "
909+ << format (" %8" PRIx64, EndAddr) << " )"
910+ << " , but the DWARF symbol " << Range->getFuncName ()
911+ << " indicates an overlapping range ("
912+ << format (" %8" PRIx64, Range->StartAddress ) << " , "
913+ << format (" %8" PRIx64, Range->EndAddress ) << " )\n " ;
914+
915+ assert (StartAddr == Range->StartAddress && EndAddr == Range->EndAddress &&
916+ " Mismatched function range" );
917+
918+ Range->Func ->NameStatus = DwarfNameStatus::Mismatch;
919+ AlternativeFunctionGUIDs.emplace (
920+ Range->Func , Function::getGUIDAssumingExternalLinkage (SymName));
921+
922+ } else if (StartAddr != Range->StartAddress &&
923+ EndAddr != Range->EndAddress ) {
924+ // Function already found in DWARF, but the address range from symbol
925+ // table conflicts/overlaps with the debug info.
926+ WithColor::warning () << " Conflicting range for symbol " << Name
927+ << " with range (" << format (" %8" PRIx64, StartAddr)
928+ << " , " << format (" %8" PRIx64, EndAddr) << " )"
929+ << " , but the DWARF symbol " << Range->getFuncName ()
930+ << " indicates another range ("
931+ << format (" %8" PRIx64, Range->StartAddress ) << " , "
932+ << format (" %8" PRIx64, Range->EndAddress ) << " )\n " ;
933+ }
934+ }
935+ }
936+
823937void ProfiledBinary::loadSymbolsFromDWARFUnit (DWARFUnit &CompilationUnit) {
824938 for (const auto &DieInfo : CompilationUnit.dies ()) {
825939 llvm::DWARFDie Die (&CompilationUnit, &DieInfo);
@@ -1034,6 +1148,58 @@ void ProfiledBinary::computeInlinedContextSizeForFunc(
10341148 }
10351149}
10361150
1151+ void ProfiledBinary::loadSymbolsFromPseudoProbe () {
1152+ if (!UsePseudoProbes)
1153+ return ;
1154+
1155+ const AddressProbesMap &Address2ProbesMap = getAddress2ProbesMap ();
1156+ for (auto *Func : ProfiledFunctions) {
1157+ if (Func->NameStatus != DwarfNameStatus::Mismatch)
1158+ continue ;
1159+ for (auto &[StartAddr, EndAddr] : Func->Ranges ) {
1160+ auto Range = findFuncRangeForStartAddr (StartAddr);
1161+ if (!Range->IsFuncEntry )
1162+ continue ;
1163+ const auto &Probe = Address2ProbesMap.find (StartAddr, EndAddr);
1164+ if (Probe.begin () != Probe.end ()) {
1165+ const MCDecodedPseudoProbeInlineTree *InlineTreeNode =
1166+ Probe.begin ()->get ().getInlineTreeNode ();
1167+ while (!InlineTreeNode->isTopLevelFunc ())
1168+ InlineTreeNode = static_cast <MCDecodedPseudoProbeInlineTree *>(
1169+ InlineTreeNode->Parent );
1170+
1171+ auto TopLevelProbes = InlineTreeNode->getProbes ();
1172+ auto TopProbe = TopLevelProbes.begin ();
1173+ assert (TopProbe != TopLevelProbes.end () &&
1174+ TopProbe->getAddress () >= StartAddr &&
1175+ TopProbe->getAddress () < EndAddr &&
1176+ " Top level pseudo probe does not match function range" );
1177+
1178+ const auto *ProbeDesc = getFuncDescForGUID (InlineTreeNode->Guid );
1179+ auto Ret = PseudoProbeNames.emplace (Func, ProbeDesc->FuncName );
1180+ if (!Ret.second && Ret.first ->second != ProbeDesc->FuncName &&
1181+ ShowDetailedWarning)
1182+ WithColor::warning ()
1183+ << " Mismatched pseudo probe names in function " << Func->FuncName
1184+ << " at range: (" << format (" %8" PRIx64, StartAddr) << " , "
1185+ << format (" %8" PRIx64, EndAddr) << " ). "
1186+ << " The previously found pseudo probe name is "
1187+ << Ret.first ->second << " but it conflicts with name "
1188+ << ProbeDesc->FuncName
1189+ << " This likely indicates a DWARF error that produces "
1190+ " conflicting symbols at the same starting address.\n " ;
1191+ }
1192+ }
1193+ }
1194+ }
1195+
1196+ StringRef ProfiledBinary::findPseudoProbeName (const BinaryFunction *Func) {
1197+ auto ProbeName = PseudoProbeNames.find (Func);
1198+ if (ProbeName == PseudoProbeNames.end ())
1199+ return StringRef ();
1200+ return ProbeName->second ;
1201+ }
1202+
10371203void ProfiledBinary::inferMissingFrames (
10381204 const SmallVectorImpl<uint64_t > &Context,
10391205 SmallVectorImpl<uint64_t > &NewContext) {
0 commit comments