From a9209b426ff1da560a1573cc9c11565397c5ce00 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Thu, 28 Aug 2025 13:30:20 -0700 Subject: [PATCH] [lldb] Delay creation of ConstStrings when parsing MachO symtab This change is motivated by performance. Creating ConstStrings can be expensive and is a non-trivial amount of ObjectFileMachO::ParseSymtab. This patch reduces the number of ConstStrings created. LLDB relies on duplicate symbol information being merged into one Symbol object. It does this by merging duplicate symbols together by name as they're created. However, before it performs the merge, it will create a ConstString for the symbol name. If this step is delayed until after symbols are merged, the number of ConstStrings created can be reduced considerably. I measured the performance difference by profiling a Release build of LLDB as it attaches to a Debug build of LLDB. This reduced the total time of ObjectFileMachO::ParseSymtab by 100-150ms. --- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 65 +++++++++---------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 924e34053d411..40b796193e70f 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -2584,7 +2584,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { std::vector N_COMM_indexes; typedef std::multimap ValueToSymbolIndexMap; typedef llvm::DenseMap NListIndexToSymbolIndexMap; - typedef llvm::DenseMap ConstNameToSymbolIndexMap; + typedef llvm::StringMap ConstNameToSymbolIndexMap; ValueToSymbolIndexMap N_FUN_addr_to_sym_idx; ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx; ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx; @@ -4210,37 +4210,20 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { uint64_t symbol_value = nlist.n_value; - if (symbol_name_non_abi_mangled) { - sym[sym_idx].GetMangled().SetMangledName( - ConstString(symbol_name_non_abi_mangled)); - sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); - } else { - - if (symbol_name && symbol_name[0] == '_') { - symbol_name++; // Skip the leading underscore - } - - if (symbol_name) { - ConstString const_symbol_name(symbol_name); - sym[sym_idx].GetMangled().SetValue(const_symbol_name); - } - } - - if (is_gsym) { - const char *gsym_name = sym[sym_idx] - .GetMangled() - .GetName(Mangled::ePreferMangled) - .GetCString(); - if (gsym_name) - N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; - } - if (symbol_section) { const addr_t section_file_addr = symbol_section->GetFileAddress(); symbol_value -= section_file_addr; } + if (!symbol_name_non_abi_mangled && symbol_name && + symbol_name[0] == '_') { + symbol_name++; // Skip the leading underscore + } + if (!is_debug) { + const char *name_to_compare = symbol_name_non_abi_mangled + ? symbol_name_non_abi_mangled + : symbol_name; if (type == eSymbolTypeCode) { // See if we can find a N_FUN entry for any code symbols. If we do // find a match, and the name matches, then we can merge the two into @@ -4253,7 +4236,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { if (range.first != range.second) { for (ValueToSymbolIndexMap::const_iterator pos = range.first; pos != range.second; ++pos) { - if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == + if (llvm::StringRef(name_to_compare) == sym[pos->second].GetMangled().GetName( Mangled::ePreferMangled)) { m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; @@ -4288,7 +4271,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { if (range.first != range.second) { for (ValueToSymbolIndexMap::const_iterator pos = range.first; pos != range.second; ++pos) { - if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) == + if (llvm::StringRef(name_to_compare) == sym[pos->second].GetMangled().GetName( Mangled::ePreferMangled)) { m_nlist_idx_to_sym_idx[nlist_idx] = pos->second; @@ -4303,13 +4286,9 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { } } else { // Combine N_GSYM stab entries with the non stab symbol. - const char *gsym_name = sym[sym_idx] - .GetMangled() - .GetName(Mangled::ePreferMangled) - .GetCString(); - if (gsym_name) { + if (name_to_compare) { ConstNameToSymbolIndexMap::const_iterator pos = - N_GSYM_name_to_sym_idx.find(gsym_name); + N_GSYM_name_to_sym_idx.find(name_to_compare); if (pos != N_GSYM_name_to_sym_idx.end()) { const uint32_t GSYM_sym_idx = pos->second; m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx; @@ -4331,6 +4310,24 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { } } + if (symbol_name_non_abi_mangled) { + sym[sym_idx].GetMangled().SetMangledName( + ConstString(symbol_name_non_abi_mangled)); + sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name)); + } else if (symbol_name) { + ConstString const_symbol_name(symbol_name); + sym[sym_idx].GetMangled().SetValue(const_symbol_name); + } + + if (is_gsym) { + const char *gsym_name = sym[sym_idx] + .GetMangled() + .GetName(Mangled::ePreferMangled) + .GetCString(); + if (gsym_name) + N_GSYM_name_to_sym_idx[gsym_name] = sym_idx; + } + sym[sym_idx].SetID(nlist_idx); sym[sym_idx].SetType(type); if (set_value) {