llvm · HighW4y2H3ll · Oct 9, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 16, 2025
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
@@ -198,6 +198,7 @@ class SymbolRef : public BasicSymbolRef {
   /// Get the alignment of this symbol as the actual value (not log 2).
   uint32_t getAlignment() const;
   uint64_t getCommonSize() const;
+  uint64_t getSize() const;
   Expected<SymbolRef::Type> getType() const;
 
   /// Get section this symbol is defined in reference to. Result is
@@ -482,6 +483,10 @@ inline uint64_t SymbolRef::getCommonSize() const {
   return getObject()->getCommonSymbolSize(getRawDataRefImpl());
 }
 
+inline uint64_t SymbolRef::getSize() const {
+  return getObject()->getCommonSymbolSizeImpl(getRawDataRefImpl());
 uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const { 
   COFFSymbolRef Symb = getCOFFSymbol(Ref); 
   return Symb.getValue(); 
 } 
 assert(*SymbolFlagsOrErr & SymbolRef::SF_Common); 
 uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const { 
   COFFSymbolRef Symb = getCOFFSymbol(Ref); 
   return Symb.getValue(); 
 } 
 assert(*SymbolFlagsOrErr & SymbolRef::SF_Common); 
+}
+
 inline Expected<section_iterator> SymbolRef::getSection() const {
   return getObject()->getSymbolSection(getRawDataRefImpl());
 }

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1214,13 +1214,18 @@ class FunctionSamples {
     // Note the sequence of the suffixes in the knownSuffixes array matters.
     // If suffix "A" is appended after the suffix "B", "A" should be in front
     // of "B" in knownSuffixes.
-    const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
+    const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix, nullptr};
 ArrayRef(const iterator_range<U *> &Range) 
 ArrayRef(const iterator_range<U *> &Range) 
+    return getCanonicalFnName(FnName, KnownSuffixes, Attr);
+  }
+
+  static StringRef getCanonicalFnName(StringRef FnName, const char *Suffixes[],
+                                      StringRef Attr = "selected") {
     if (Attr == "" || Attr == "all")
       return FnName.split('.').first;
     if (Attr == "selected") {
       StringRef Cand(FnName);
-      for (const auto &Suf : KnownSuffixes) {
-        StringRef Suffix(Suf);
+      for (const char **Suf = Suffixes; *Suf; Suf++) {
+        StringRef Suffix(*Suf);
         // If the profile contains ".__uniq." suffix, don't strip the
         // suffix for names in the IR.
         if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)

diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,29 +449,62 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
   // Go through all the stacks, ranges and branches in sample counters, use
   // the start of the range to look up the function it belongs and record the
   // function.
+  uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0;
+  uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0;
   for (const auto &CI : *SampleCounters) {
     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
       for (auto StackAddr : CtxKey->Context) {
+        uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0;
+        TotalStkAddr += Inc;
         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
           ProfiledFunctions.insert(FRange->Func);
+        else
+          ErrStkAddr += Inc;
       }
     }
 
     for (auto Item : CI.second.RangeCounter) {
       uint64_t StartAddress = Item.first.first;
+      uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0;
+      TotalFuncRange += Inc;
       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrFuncRange += Inc;
     }
 
     for (auto Item : CI.second.BranchCounter) {
       uint64_t SourceAddress = Item.first.first;
       uint64_t TargetAddress = Item.first.second;
+      uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0;
+      uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0;
+      TotalSrc += SrcInc;
       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrSrc += SrcInc;
+      TotalTgt += TgtInc;
       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
         ProfiledFunctions.insert(FRange->Func);
+      else
+        ErrTgt += TgtInc;
     }
   }
+
+  if (ErrStkAddr)
+    emitWarningSummary(
+        ErrStkAddr, TotalStkAddr,
+        "of stack address samples do not belong to any function");
+  if (ErrFuncRange)
+    emitWarningSummary(
+        ErrFuncRange, TotalFuncRange,
+        "of function range samples do not belong to any function");
+  if (ErrSrc)
+    emitWarningSummary(ErrSrc, TotalSrc,
+                       "of LBR source samples do not belong to any function");
+  if (ErrTgt)
+    emitWarningSummary(ErrTgt, TotalTgt,
+                       "of LBR target samples do not belong to any function");
   return true;
 }
 

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -257,6 +257,8 @@ void ProfiledBinary::load() {
   if (ShowDisassemblyOnly)
     decodePseudoProbe(Obj);
 
+  populateSymbolsFromBinary(Obj);
+
   // Disassemble the text sections.
   disassemble(Obj);
 
@@ -604,13 +606,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
       // Record potential call targets for tail frame inference later-on.
       if (InferMissingFrames && FRange) {
         uint64_t Target = 0;
-        MIA->evaluateBranch(Inst, Address, Size, Target);
+        bool Err = MIA->evaluateBranch(Inst, Address, Size, Target);
         if (MCDesc.isCall()) {
           // Indirect call targets are unknown at this point. Recording the
           // unknown target (zero) for further LBR-based refinement.
           MissingContextInferrer->CallEdges[Address].insert(Target);
         } else if (MCDesc.isUnconditionalBranch()) {
-          assert(Target &&
+          assert(Err &&
                  "target should be known for unconditional direct branch");
           // Any inter-function unconditional jump is considered tail call at
           // this point. This is not 100% accurate and could further be
@@ -820,6 +822,48 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) {
   }
 }
 
+void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) {
+  // Load binary functions from symbol table when Debug info is incomplete
+  StringRef FileName = Obj->getFileName();
+  for (const SymbolRef &Symbol : Obj->symbols()) {
+    const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName);
+    const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+    const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+    const uint64_t Size = Symbol.getSize();
+
+    if (Size == 0 || Type != SymbolRef::ST_Function)
+      continue;
+
+    const char *Suffixes[] = {".destroy", ".resume", ".llvm.",
+                              ".cold",    ".warm",   nullptr};
+    const StringRef SymName =
+        FunctionSamples::getCanonicalFnName(Name, Suffixes);
+
+    auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction());
+    auto &Func = Ret.first->second;
+    if (Ret.second)
+      Func.FuncName = Ret.first->first;
+
+    if (auto Range = findFuncRange(Addr)) {
+      if (Ret.second && ShowDetailedWarning)
+        WithColor::warning()
 auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); 
 auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); 
+            << "Symbol " << Name << " start address "
+            << format("%8" PRIx64, Addr) << " already exists in DWARF at "
+            << format("%8" PRIx64, Range->StartAddress) << " in function "
+            << Range->getFuncName() << "\n";
+    } else {
+      // Store/Update Function Range from SymTab
+      Func.Ranges.emplace_back(Addr, Addr + Size);
+
+      auto R = StartAddrToFuncRangeMap.emplace(Addr, FuncRange());
+      FuncRange &FRange = R.first->second;
+      FRange.Func = &Func;
+      FRange.StartAddress = Addr;
+      FRange.EndAddress = Addr + Size;
+    }
+  }
+}
+
 void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
   for (const auto &DieInfo : CompilationUnit.dies()) {
     llvm::DWARFDie Die(&CompilationUnit, &DieInfo);

diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -356,6 +356,9 @@ class ProfiledBinary {
   // Create symbol to its start address mapping.
   void populateSymbolAddressList(const object::ObjectFile *O);
 
+  // Load functions from its symbol table (when DWARF info is missing).
+  void populateSymbolsFromBinary(const object::ObjectFile *O);
+
   // A function may be spilt into multiple non-continuous address ranges. We use
   // this to set whether start a function range is the real entry of the
   // function and also set false to the non-function label.