@@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
840840 continue ;
841841
842842 if (cantFail (Symbol.getType ()) == SymbolRef::ST_File) {
843+ FileSymbols.emplace_back (Symbol);
843844 StringRef Name =
844845 cantFail (std::move (NameOrError), " cannot get symbol name for file" );
845846 // Ignore Clang LTO artificial FILE symbol as it is not always generated,
@@ -1340,6 +1341,7 @@ void RewriteInstance::discoverFileObjects() {
13401341 }
13411342
13421343 registerFragments ();
1344+ FileSymbols.clear ();
13431345}
13441346
13451347Error RewriteInstance::discoverRtFiniAddress () {
@@ -1417,50 +1419,139 @@ void RewriteInstance::registerFragments() {
14171419 if (!BC->HasSplitFunctions )
14181420 return ;
14191421
1422+ // Process fragments with ambiguous parents separately as they are typically a
1423+ // vanishing minority of cases and require expensive symbol table lookups.
1424+ std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
14201425 for (auto &BFI : BC->getBinaryFunctions ()) {
14211426 BinaryFunction &Function = BFI.second ;
14221427 if (!Function.isFragment ())
14231428 continue ;
1424- unsigned ParentsFound = 0 ;
14251429 for (StringRef Name : Function.getNames ()) {
1426- StringRef BaseName, Suffix ;
1427- std::tie (BaseName, Suffix) = Name. split ( ' / ' ) ;
1430+ StringRef BaseName = NR. restore (Name) ;
1431+ const bool IsGlobal = BaseName == Name;
14281432 const size_t ColdSuffixPos = BaseName.find (" .cold" );
14291433 if (ColdSuffixPos == StringRef::npos)
14301434 continue ;
1431- // For cold function with local (foo.cold/1) symbol, prefer a parent with
1432- // local symbol as well (foo/1) over global symbol (foo).
1433- std::string ParentName = BaseName.substr (0 , ColdSuffixPos).str ();
1435+ StringRef ParentName = BaseName.substr (0 , ColdSuffixPos);
14341436 const BinaryData *BD = BC->getBinaryDataByName (ParentName);
1435- if (Suffix != " " ) {
1436- ParentName.append (Twine (" /" , Suffix).str ());
1437- const BinaryData *BDLocal = BC->getBinaryDataByName (ParentName);
1438- if (BDLocal || !BD)
1439- BD = BDLocal;
1440- }
1441- if (!BD) {
1442- if (opts::Verbosity >= 1 )
1443- BC->outs () << " BOLT-INFO: parent function not found for " << Name
1444- << " \n " ;
1437+ const uint64_t NumPossibleLocalParents =
1438+ NR.getUniquifiedNameCount (ParentName);
1439+ // The most common case: single local parent fragment.
1440+ if (!BD && NumPossibleLocalParents == 1 ) {
1441+ BD = BC->getBinaryDataByName (NR.getUniqueName (ParentName, 1 ));
1442+ } else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
1443+ // Global parent and either no local candidates (second most common), or
1444+ // the fragment is global as well (uncommon).
1445+ } else {
1446+ // Any other case: need to disambiguate using FILE symbols.
1447+ AmbiguousFragments.emplace_back (ParentName, &Function);
14451448 continue ;
14461449 }
1447- const uint64_t Address = BD->getAddress ();
1448- BinaryFunction *BF = BC->getBinaryFunctionAtAddress (Address);
1449- if (!BF) {
1450- if (opts::Verbosity >= 1 )
1451- BC->outs () << formatv (
1452- " BOLT-INFO: parent function not found at {0:x}\n " , Address);
1453- continue ;
1450+ if (BD) {
1451+ BinaryFunction *BF = BC->getFunctionForSymbol (BD->getSymbol ());
1452+ if (BF) {
1453+ BC->registerFragment (Function, *BF);
1454+ continue ;
1455+ }
14541456 }
1455- BC->registerFragment (Function, *BF);
1456- ++ParentsFound;
1457- }
1458- if (!ParentsFound) {
14591457 BC->errs () << " BOLT-ERROR: parent function not found for " << Function
14601458 << ' \n ' ;
14611459 exit (1 );
14621460 }
14631461 }
1462+
1463+ if (AmbiguousFragments.empty ())
1464+ return ;
1465+
1466+ if (!BC->hasSymbolsWithFileName ()) {
1467+ BC->errs () << " BOLT-ERROR: input file has split functions but does not "
1468+ " have FILE symbols. If the binary was stripped, preserve "
1469+ " FILE symbols with --keep-file-symbols strip option" ;
1470+ exit (1 );
1471+ }
1472+
1473+ // The first global symbol is identified by the symbol table sh_info value.
1474+ // Used as local symbol search stopping point.
1475+ auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
1476+ const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile ();
1477+ auto *SymTab = llvm::find_if (cantFail (Obj.sections ()), [](const auto &Sec) {
1478+ return Sec.sh_type == ELF::SHT_SYMTAB;
1479+ });
1480+ assert (SymTab);
1481+ if (!SymTab->sh_info ) {
1482+ BC->errs () << " BOLT-ERROR: malformed SYMTAB sh_info\n " ;
1483+ exit (1 );
1484+ }
1485+ ELFSymbolRef FirstGlobal = ELF64LEFile->toSymbolRef (SymTab, SymTab->sh_info );
1486+
1487+ for (auto &[ParentName, BF] : AmbiguousFragments) {
1488+ const uint64_t Address = BF->getAddress ();
1489+
1490+ // Get fragment's own symbol
1491+ const auto SymIt = FileSymRefs.find (Address);
1492+ if (SymIt == FileSymRefs.end ()) {
1493+ BC->errs ()
1494+ << " BOLT-ERROR: symbol lookup failed for function at address 0x"
1495+ << Twine::utohexstr (Address) << ' \n ' ;
1496+ exit (1 );
1497+ }
1498+
1499+ // Find containing FILE symbol
1500+ ELFSymbolRef Symbol = SymIt->second ;
1501+ auto FSI = llvm::upper_bound (FileSymbols, Symbol);
1502+ if (FSI == FileSymbols.begin ()) {
1503+ BC->errs () << " BOLT-ERROR: owning FILE symbol not found for symbol "
1504+ << cantFail (Symbol.getName ()) << ' \n ' ;
1505+ exit (1 );
1506+ }
1507+
1508+ ELFSymbolRef StopSymbol = FirstGlobal;
1509+ if (FSI != FileSymbols.end ())
1510+ StopSymbol = *FSI;
1511+
1512+ uint64_t ParentAddress{0 };
1513+
1514+ // BOLT split fragment symbols are emitted just before the main function
1515+ // symbol.
1516+ for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol;
1517+ NextSymbol.moveNext ()) {
1518+ Expected<StringRef> NameOrError = Symbol.getName ();
1519+ if (!NameOrError)
1520+ break ;
1521+ StringRef Name = *NameOrError;
1522+ if (Name == ParentName) {
1523+ ParentAddress = cantFail (NextSymbol.getValue ());
1524+ goto registerParent;
1525+ }
1526+ if (Name.starts_with (ParentName))
1527+ // With multi-way splitting, there are multiple fragments with different
1528+ // suffixes. Parent follows the last fragment.
1529+ continue ;
1530+ break ;
1531+ }
1532+
1533+ // Iterate over local file symbols and check symbol names to match parent.
1534+ for (ELFSymbolRef Symbol (FSI[-1 ]); Symbol < StopSymbol; Symbol.moveNext ()) {
1535+ if (cantFail (Symbol.getName ()) == ParentName) {
1536+ ParentAddress = cantFail (Symbol.getAddress ());
1537+ break ;
1538+ }
1539+ }
1540+
1541+ registerParent:
1542+ // No local parent is found, use global parent function.
1543+ if (!ParentAddress)
1544+ if (BinaryData *ParentBD = BC->getBinaryDataByName (ParentName))
1545+ ParentAddress = ParentBD->getAddress ();
1546+
1547+ if (BinaryFunction *ParentBF =
1548+ BC->getBinaryFunctionAtAddress (ParentAddress)) {
1549+ BC->registerFragment (*BF, *ParentBF);
1550+ continue ;
1551+ }
1552+ BC->errs () << " BOLT-ERROR: parent function not found for " << *BF << ' \n ' ;
1553+ exit (1 );
1554+ }
14641555}
14651556
14661557void RewriteInstance::createPLTBinaryFunction (uint64_t TargetAddress,
0 commit comments