From cf8f7b8fca2d4910425fe39cddd997652b444d58 Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Thu, 7 Aug 2025 15:32:49 -0700 Subject: [PATCH 1/4] Reland "[NFC][lldb] Speed up lookup of shared modules" (229d860) Original commit message: By profiling LLDB debugging a Swift application without a dSYM and a large amount of .o files, I identified that querying shared modules was the biggest bottleneck when running "frame variable", and Clang types need to be searched. One of the reasons for that slowness is that the shared module list can can grow very large, and the search through it is O(n). To solve this issue, this patch adds a new hashmap to the shared module list whose key is the name of the module, and the value is all the modules that share that name. This should speed up any search where the query contains the module name. rdar://156753350 --- lldb/source/Core/ModuleList.cpp | 229 ++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index d5ddc2b249e56..bb732a28eddc9 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -755,6 +755,235 @@ size_t ModuleList::GetIndexForModule(const Module *module) const { } namespace { +/// A wrapper around ModuleList for shared modules. Provides fast lookups for +/// file-based ModuleSpec queries. +class SharedModuleList { +public: + /// Finds all the modules matching the module_spec, and adds them to \p + /// matching_module_list. + void FindModules(const ModuleSpec &module_spec, + ModuleList &matching_module_list) const { + std::lock_guard guard(GetMutex()); + // Try map first for performance - if found, skip expensive full list + // search. + if (FindModulesInMap(module_spec, matching_module_list)) + return; + m_list.FindModules(module_spec, matching_module_list); + // Assert that modules were found in the list but not the map, it's + // because the module_spec has no filename or the found module has a + // different filename. For example, when searching by UUID and finding a + // module with an alias. + assert((matching_module_list.IsEmpty() || + module_spec.GetFileSpec().GetFilename().IsEmpty() || + module_spec.GetFileSpec().GetFilename() != + matching_module_list.GetModuleAtIndex(0) + ->GetFileSpec() + .GetFilename()) && + "Search by name not found in SharedModuleList's map"); + } + + ModuleSP FindModule(const Module *module_ptr) { + if (!module_ptr) + return ModuleSP(); + + std::lock_guard guard(GetMutex()); + if (ModuleSP result = FindModuleInMap(module_ptr)) + return result; + return m_list.FindModule(module_ptr); + } + + // UUID searches bypass map since UUIDs aren't indexed by filename. + ModuleSP FindModule(const UUID &uuid) const { + return m_list.FindModule(uuid); + } + + void Append(const ModuleSP &module_sp, bool use_notifier) { + if (!module_sp) + return; + std::lock_guard guard(GetMutex()); + m_list.Append(module_sp, use_notifier); + AddToMap(module_sp); + } + + size_t RemoveOrphans(bool mandatory) { + std::unique_lock lock(GetMutex(), std::defer_lock); + if (mandatory) { + lock.lock(); + } else { + if (!lock.try_lock()) + return 0; + } + size_t total_count = 0; + size_t run_count; + do { + // Remove indexed orphans first, then remove non-indexed orphans. This + // order is important because the shared count will be different if a + // module is indexed or not. + run_count = RemoveOrphansFromMapAndList(); + run_count += m_list.RemoveOrphans(mandatory); + total_count += run_count; + // Because removing orphans might make new orphans, remove from both + // containers until a fixed-point is reached. + } while (run_count != 0); + + return total_count; + } + + bool Remove(const ModuleSP &module_sp, bool use_notifier = true) { + if (!module_sp) + return false; + std::lock_guard guard(GetMutex()); + RemoveFromMap(module_sp.get()); + return m_list.Remove(module_sp, use_notifier); + } + + void ReplaceEquivalent(const ModuleSP &module_sp, + llvm::SmallVectorImpl *old_modules) { + std::lock_guard guard(GetMutex()); + m_list.ReplaceEquivalent(module_sp, old_modules); + ReplaceEquivalentInMap(module_sp); + } + + bool RemoveIfOrphaned(const Module *module_ptr) { + std::lock_guard guard(GetMutex()); + RemoveFromMap(module_ptr, /*if_orphaned=*/true); + return m_list.RemoveIfOrphaned(module_ptr); + } + + std::recursive_mutex &GetMutex() const { return m_list.GetMutex(); } + +private: + ModuleSP FindModuleInMap(const Module *module_ptr) const { + if (!module_ptr->GetFileSpec().GetFilename()) + return ModuleSP(); + ConstString name = module_ptr->GetFileSpec().GetFilename(); + auto it = m_name_to_modules.find(name); + if (it == m_name_to_modules.end()) + return ModuleSP(); + const llvm::SmallVectorImpl &vector = it->second; + for (const ModuleSP &module_sp : vector) { + if (module_sp.get() == module_ptr) + return module_sp; + } + return ModuleSP(); + } + + bool FindModulesInMap(const ModuleSpec &module_spec, + ModuleList &matching_module_list) const { + auto it = m_name_to_modules.find(module_spec.GetFileSpec().GetFilename()); + if (it == m_name_to_modules.end()) + return false; + const llvm::SmallVectorImpl &vector = it->second; + bool found = false; + for (const ModuleSP &module_sp : vector) { + if (module_sp->MatchesModuleSpec(module_spec)) { + matching_module_list.Append(module_sp); + found = true; + } + } + return found; + } + + void AddToMap(const ModuleSP &module_sp) { + ConstString name = module_sp->GetFileSpec().GetFilename(); + if (name.IsEmpty()) + return; + m_name_to_modules[name].push_back(module_sp); + } + + void RemoveFromMap(const Module *module_ptr, bool if_orphaned = false) { + if (!module_ptr) + return; + ConstString name = module_ptr->GetFileSpec().GetFilename(); + if (!m_name_to_modules.contains(name)) + return; + llvm::SmallVectorImpl &vec = m_name_to_modules[name]; + for (auto *it = vec.begin(); it != vec.end(); ++it) { + if (it->get() == module_ptr) { + // use_count == 2 means only held by map and list (orphaned). + constexpr long kUseCountOrphaned = 2; + if (!if_orphaned || it->use_count() == kUseCountOrphaned) { + vec.erase(it); + break; + } + } + } + } + + void ReplaceEquivalentInMap(const ModuleSP &module_sp) { + RemoveEquivalentModulesFromMap(module_sp); + AddToMap(module_sp); + } + + void RemoveEquivalentModulesFromMap(const ModuleSP &module_sp) { + ConstString name = module_sp->GetFileSpec().GetFilename(); + if (name.IsEmpty()) + return; + + auto it = m_name_to_modules.find(name); + if (it == m_name_to_modules.end()) + return; + + // First remove any equivalent modules. Equivalent modules are modules + // whose path, platform path and architecture match. + ModuleSpec equivalent_module_spec(module_sp->GetFileSpec(), + module_sp->GetArchitecture()); + equivalent_module_spec.GetPlatformFileSpec() = + module_sp->GetPlatformFileSpec(); + + llvm::SmallVectorImpl &vec = it->second; + llvm::erase_if(vec, [&equivalent_module_spec](ModuleSP &element) { + return element->MatchesModuleSpec(equivalent_module_spec); + }); + } + + /// Remove orphans from the vector and return the removed modules. + ModuleList RemoveOrphansFromVector(llvm::SmallVectorImpl &vec) { + ModuleList to_remove; + for (int i = vec.size() - 1; i >= 0; --i) { + ModuleSP module = vec[i]; + constexpr long kUseCountOrphaned = 2; + constexpr long kUseCountLocalVariable = 1; + // use_count == 3: map + list + local variable = orphaned. + if (module.use_count() == kUseCountOrphaned + kUseCountLocalVariable) { + to_remove.Append(module); + vec.erase(vec.begin() + i); + } + } + return to_remove; + } + + /// Remove orphans that exist in both the map and list. This does not remove + /// any orphans that exist exclusively on the list. + /// + /// The mutex must be locked by the caller. + int RemoveOrphansFromMapAndList() { + // Modules might hold shared pointers to other modules, so removing one + // module might orphan other modules. Keep removing modules until + // there are no further modules that can be removed. + bool made_progress = true; + int remove_count = 0; + while (made_progress) { + made_progress = false; + for (auto &[name, vec] : m_name_to_modules) { + if (vec.empty()) + continue; + ModuleList to_remove = RemoveOrphansFromVector(vec); + remove_count += to_remove.GetSize(); + made_progress = !to_remove.IsEmpty(); + m_list.Remove(to_remove); + } + } + return remove_count; + } + + ModuleList m_list; + + /// A hash map from a module's filename to all the modules that share that + /// filename, for fast module lookups by name. + llvm::DenseMap> m_name_to_modules; +}; + struct SharedModuleListInfo { ModuleList module_list; ModuleListProperties module_list_properties; From d09c3330f6877ab1729dd8e07f36717825f2e454 Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Fri, 8 Aug 2025 10:27:32 -0700 Subject: [PATCH 2/4] fixup! Reland "[NFC][lldb] Speed up lookup of shared modules" (229d860) --- lldb/source/Core/ModuleList.cpp | 49 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index bb732a28eddc9..e507b2e8eb68a 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -766,7 +766,8 @@ class SharedModuleList { std::lock_guard guard(GetMutex()); // Try map first for performance - if found, skip expensive full list // search. - if (FindModulesInMap(module_spec, matching_module_list)) + FindModulesInMap(module_spec, matching_module_list); + if (!matching_module_list.IsEmpty()) return; m_list.FindModules(module_spec, matching_module_list); // Assert that modules were found in the list but not the map, it's @@ -868,20 +869,16 @@ class SharedModuleList { return ModuleSP(); } - bool FindModulesInMap(const ModuleSpec &module_spec, + void FindModulesInMap(const ModuleSpec &module_spec, ModuleList &matching_module_list) const { auto it = m_name_to_modules.find(module_spec.GetFileSpec().GetFilename()); if (it == m_name_to_modules.end()) - return false; + return; const llvm::SmallVectorImpl &vector = it->second; - bool found = false; for (const ModuleSP &module_sp : vector) { - if (module_sp->MatchesModuleSpec(module_spec)) { + if (module_sp->MatchesModuleSpec(module_spec)) matching_module_list.Append(module_sp); - found = true; - } } - return found; } void AddToMap(const ModuleSP &module_sp) { @@ -900,8 +897,6 @@ class SharedModuleList { llvm::SmallVectorImpl &vec = m_name_to_modules[name]; for (auto *it = vec.begin(); it != vec.end(); ++it) { if (it->get() == module_ptr) { - // use_count == 2 means only held by map and list (orphaned). - constexpr long kUseCountOrphaned = 2; if (!if_orphaned || it->use_count() == kUseCountOrphaned) { vec.erase(it); break; @@ -939,17 +934,17 @@ class SharedModuleList { /// Remove orphans from the vector and return the removed modules. ModuleList RemoveOrphansFromVector(llvm::SmallVectorImpl &vec) { + // remove_if moves the elements that match the condition to the end of the + // container, and returns an iterator to the first element that was moved. + auto *to_remove_start = llvm::remove_if(vec, [](const ModuleSP &module) { + return module.use_count() == kUseCountOrphaned; + }); + ModuleList to_remove; - for (int i = vec.size() - 1; i >= 0; --i) { - ModuleSP module = vec[i]; - constexpr long kUseCountOrphaned = 2; - constexpr long kUseCountLocalVariable = 1; - // use_count == 3: map + list + local variable = orphaned. - if (module.use_count() == kUseCountOrphaned + kUseCountLocalVariable) { - to_remove.Append(module); - vec.erase(vec.begin() + i); - } - } + for (ModuleSP *it = to_remove_start; it != vec.end(); ++it) + to_remove.Append(*it); + + vec.erase(to_remove_start, vec.end()); return to_remove; } @@ -961,19 +956,20 @@ class SharedModuleList { // Modules might hold shared pointers to other modules, so removing one // module might orphan other modules. Keep removing modules until // there are no further modules that can be removed. - bool made_progress = true; int remove_count = 0; - while (made_progress) { - made_progress = false; + int previous_remove_count; + do { + previous_remove_count = remove_count; for (auto &[name, vec] : m_name_to_modules) { if (vec.empty()) continue; ModuleList to_remove = RemoveOrphansFromVector(vec); remove_count += to_remove.GetSize(); - made_progress = !to_remove.IsEmpty(); m_list.Remove(to_remove); } - } + // Break when fixed-point is reached. + } while (previous_remove_count != remove_count); + return remove_count; } @@ -982,6 +978,9 @@ class SharedModuleList { /// A hash map from a module's filename to all the modules that share that /// filename, for fast module lookups by name. llvm::DenseMap> m_name_to_modules; + + /// The use count of a module held only by m_list and m_name_to_modules. + static constexpr long kUseCountOrphaned = 2; }; struct SharedModuleListInfo { From 6d5d8f5a313f5216e33e4be227e7362943fb3f81 Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Fri, 8 Aug 2025 10:34:36 -0700 Subject: [PATCH 3/4] fixup! Reland "[NFC][lldb] Speed up lookup of shared modules" (229d860) --- lldb/source/Core/ModuleList.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index e507b2e8eb68a..56b2d7a6669ea 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -967,8 +967,8 @@ class SharedModuleList { remove_count += to_remove.GetSize(); m_list.Remove(to_remove); } - // Break when fixed-point is reached. - } while (previous_remove_count != remove_count); + // Break when fixed-point is reached. + } while (previous_remove_count != remove_count); return remove_count; } @@ -978,7 +978,7 @@ class SharedModuleList { /// A hash map from a module's filename to all the modules that share that /// filename, for fast module lookups by name. llvm::DenseMap> m_name_to_modules; - + /// The use count of a module held only by m_list and m_name_to_modules. static constexpr long kUseCountOrphaned = 2; }; From be753835542a6c026e2701c9fe0baa41b17ce83b Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Tue, 12 Aug 2025 12:16:48 -0700 Subject: [PATCH 4/4] fixup! Reland "[NFC][lldb] Speed up lookup of shared modules" (229d860) --- lldb/source/Core/ModuleList.cpp | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index 56b2d7a6669ea..bc63a41c90d17 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -783,14 +783,12 @@ class SharedModuleList { "Search by name not found in SharedModuleList's map"); } - ModuleSP FindModule(const Module *module_ptr) { - if (!module_ptr) - return ModuleSP(); + ModuleSP FindModule(const Module &module) { std::lock_guard guard(GetMutex()); - if (ModuleSP result = FindModuleInMap(module_ptr)) + if (ModuleSP result = FindModuleInMap(module)) return result; - return m_list.FindModule(module_ptr); + return m_list.FindModule(&module); } // UUID searches bypass map since UUIDs aren't indexed by filename. @@ -834,7 +832,7 @@ class SharedModuleList { if (!module_sp) return false; std::lock_guard guard(GetMutex()); - RemoveFromMap(module_sp.get()); + RemoveFromMap(*module_sp.get()); return m_list.Remove(module_sp, use_notifier); } @@ -847,23 +845,23 @@ class SharedModuleList { bool RemoveIfOrphaned(const Module *module_ptr) { std::lock_guard guard(GetMutex()); - RemoveFromMap(module_ptr, /*if_orphaned=*/true); + RemoveFromMap(*module_ptr, /*if_orphaned=*/true); return m_list.RemoveIfOrphaned(module_ptr); } std::recursive_mutex &GetMutex() const { return m_list.GetMutex(); } private: - ModuleSP FindModuleInMap(const Module *module_ptr) const { - if (!module_ptr->GetFileSpec().GetFilename()) + ModuleSP FindModuleInMap(const Module &module) const { + if (!module.GetFileSpec().GetFilename()) return ModuleSP(); - ConstString name = module_ptr->GetFileSpec().GetFilename(); + ConstString name = module.GetFileSpec().GetFilename(); auto it = m_name_to_modules.find(name); if (it == m_name_to_modules.end()) return ModuleSP(); const llvm::SmallVectorImpl &vector = it->second; for (const ModuleSP &module_sp : vector) { - if (module_sp.get() == module_ptr) + if (module_sp.get() == &module) return module_sp; } return ModuleSP(); @@ -888,15 +886,13 @@ class SharedModuleList { m_name_to_modules[name].push_back(module_sp); } - void RemoveFromMap(const Module *module_ptr, bool if_orphaned = false) { - if (!module_ptr) - return; - ConstString name = module_ptr->GetFileSpec().GetFilename(); + void RemoveFromMap(const Module &module, bool if_orphaned = false) { + ConstString name = module.GetFileSpec().GetFilename(); if (!m_name_to_modules.contains(name)) return; llvm::SmallVectorImpl &vec = m_name_to_modules[name]; for (auto *it = vec.begin(); it != vec.end(); ++it) { - if (it->get() == module_ptr) { + if (it->get() == &module) { if (!if_orphaned || it->use_count() == kUseCountOrphaned) { vec.erase(it); break;