Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lldb/include/lldb/Symbol/SymbolFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,12 @@ class SymbolFile : public PluginInterface {
/// If true, then only return separate debug info files that encountered
/// errors during loading. If false, then return all expected separate
/// debug info files, regardless of whether they were successfully loaded.
/// \param load_all_debug_info
/// If true, force loading any symbol files if they are not yet loaded and
/// add its info to the debug info files. Default to true.
virtual bool GetSeparateDebugInfo(StructuredData::Dictionary &d,
bool errors_only) {
bool errors_only,
bool load_all_debug_info = true) {
return false;
};

Expand Down
7 changes: 4 additions & 3 deletions lldb/include/lldb/Symbol/SymbolFileOnDemand.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,10 @@ class SymbolFileOnDemand : public lldb_private::SymbolFile {
return m_sym_file_impl->SetDebugInfoHadFrameVariableErrors();
}

bool GetSeparateDebugInfo(StructuredData::Dictionary &d,
bool errors_only) override {
return m_sym_file_impl->GetSeparateDebugInfo(d, errors_only);
bool GetSeparateDebugInfo(StructuredData::Dictionary &d, bool errors_only,
bool load_all_debug_info = true) override {
return m_sym_file_impl->GetSeparateDebugInfo(d, errors_only,
load_all_debug_info);
}

lldb::TypeSP MakeType(lldb::user_id_t uid, ConstString name,
Expand Down
26 changes: 19 additions & 7 deletions lldb/packages/Python/lldbsuite/test/builders/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,25 @@ def getLLDBObjRoot(self):
def _getDebugInfoArgs(self, debug_info):
if debug_info is None:
return []
if debug_info == "dwarf":
return ["MAKE_DSYM=NO"]
if debug_info == "dwo":
return ["MAKE_DSYM=NO", "MAKE_DWO=YES"]
if debug_info == "gmodules":
return ["MAKE_DSYM=NO", "MAKE_GMODULES=YES"]
return None

debug_options = debug_info if isinstance(debug_info, list) else [debug_info]
option_flags = {
"dwarf": {"MAKE_DSYM": "NO"},
"dwo": {"MAKE_DSYM": "NO", "MAKE_DWO": "YES"},
"gmodules": {"MAKE_DSYM": "NO", "MAKE_GMODULES": "YES"},
"debug_names": {"MAKE_DEBUG_NAMES": "YES"},
"dwp": {"MAKE_DSYM": "NO", "MAKE_DWP": "YES"},
}

# Collect all flags, with later options overriding earlier ones
flags = {}

for option in debug_options:
if not option or option not in option_flags:
return None # Invalid options
flags.update(option_flags[option])

return [f"{key}={value}" for key, value in flags.items()]

def getBuildCommand(
self,
Expand Down
4 changes: 4 additions & 0 deletions lldb/packages/Python/lldbsuite/test/make/Makefile.rules
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ ifeq "$(MAKE_DWO)" "YES"
CFLAGS += -gsplit-dwarf
endif

ifeq "$(MAKE_DEBUG_NAMES)" "YES"
CFLAGS += -gpubnames
endif

ifeq "$(USE_PRIVATE_MODULE_CACHE)" "YES"
THE_CLANG_MODULE_CACHE_DIR := $(BUILDDIR)/private-module-cache
else
Expand Down
5 changes: 3 additions & 2 deletions lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4139,7 +4139,8 @@ void SymbolFileDWARF::DumpClangAST(Stream &s, llvm::StringRef filter) {
}

bool SymbolFileDWARF::GetSeparateDebugInfo(StructuredData::Dictionary &d,
bool errors_only) {
bool errors_only,
bool load_all_debug_info) {
StructuredData::Array separate_debug_info_files;
DWARFDebugInfo &info = DebugInfo();
const size_t num_cus = info.GetNumUnits();
Expand Down Expand Up @@ -4182,7 +4183,7 @@ bool SymbolFileDWARF::GetSeparateDebugInfo(StructuredData::Dictionary &d,

// If we have a DWO symbol file, that means we were able to successfully
// load it.
SymbolFile *dwo_symfile = dwarf_cu->GetDwoSymbolFile();
SymbolFile *dwo_symfile = dwarf_cu->GetDwoSymbolFile(load_all_debug_info);
if (dwo_symfile) {
dwo_data->AddStringItem(
"resolved_dwo_path",
Expand Down
4 changes: 2 additions & 2 deletions lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,8 @@ class SymbolFileDWARF : public SymbolFileCommon {
void DumpClangAST(Stream &s, llvm::StringRef filter) override;

/// List separate dwo files.
bool GetSeparateDebugInfo(StructuredData::Dictionary &d,
bool errors_only) override;
bool GetSeparateDebugInfo(StructuredData::Dictionary &d, bool errors_only,
bool load_all_debug_info = true) override;

DWARFContext &GetDWARFContext() { return m_context; }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1278,7 +1278,8 @@ void SymbolFileDWARFDebugMap::DumpClangAST(Stream &s, llvm::StringRef filter) {
}

bool SymbolFileDWARFDebugMap::GetSeparateDebugInfo(
lldb_private::StructuredData::Dictionary &d, bool errors_only) {
lldb_private::StructuredData::Dictionary &d, bool errors_only,
bool load_all_debug_info) {
StructuredData::Array separate_debug_info_files;
const uint32_t cu_count = GetNumCompileUnits();
for (uint32_t cu_idx = 0; cu_idx < cu_count; ++cu_idx) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ class SymbolFileDWARFDebugMap : public SymbolFileCommon {
void DumpClangAST(Stream &s, llvm::StringRef filter) override;

/// List separate oso files.
bool GetSeparateDebugInfo(StructuredData::Dictionary &d,
bool errors_only) override;
bool GetSeparateDebugInfo(StructuredData::Dictionary &d, bool errors_only,
bool load_all_debug_info = true) override;

// PluginInterface protocol
llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
Expand Down
37 changes: 37 additions & 0 deletions lldb/source/Target/Statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,37 @@ static void EmplaceSafeString(llvm::json::Object &obj, llvm::StringRef key,
obj.try_emplace(key, llvm::json::fixUTF8(str));
}

static void UpdateDwoFileCounts(SymbolFile *sym_file,
uint32_t &total_dwo_file_count,
uint32_t &total_loaded_dwo_file_count) {
// Count DWO files from this symbol file using GetSeparateDebugInfo
// For DWP files, this increments counts for both total and successfully
// loaded DWO CUs. For non split-dwarf files, these counts should not change
StructuredData::Dictionary separate_debug_info;
if (sym_file->GetSeparateDebugInfo(separate_debug_info,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not recommend calling GetSeparateDebugInfo API for this task. GetSeparateDebugInfo API constructs StructuredData::Dictionary JSON style objects internally which is not cheap (I have seen profile traces that indicate constructing StructuredData::Dictionary as hot paths). This can get worse considering we are using statistics dump in lldb session logging by default. (means we are paying the cost every time).

If we want to reuse it, I would suggest refactoring and reuse the necessary underlying implementation without extra expensive burdens.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, I'll make a separate API for getting the counts here without constructing StructuredData::Dictionary.

/*errors_only=*/false,
/*load_all_debug_info*/ false)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/*load_all_debug_info=*/false

llvm::StringRef type;
if (separate_debug_info.GetValueForKeyAsString("type", type) &&
type == "dwo") {
StructuredData::Array *files;
if (separate_debug_info.GetValueForKeyAsArray("separate-debug-info-files",
files)) {
files->ForEach([&](StructuredData::Object *obj) {
if (auto dict = obj->GetAsDictionary()) {
total_dwo_file_count++;

bool loaded = false;
if (dict->GetValueForKeyAsBoolean("loaded", loaded) && loaded)
Copy link
Contributor

@jeffreytan81 jeffreytan81 Jun 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure this is correct for dwp. In dwp scenario, there is one dwp file for the entire module (instead of one dwo for each compile unit), so only one SymbolFileDWARFDwo will be created. Then, if you check the code here https://github.com/llvm/llvm-project/blob/main/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp#L4185-L4194, it will mark that dwo as "loaded if SymbolFileDWARFDwo exists, which will happen for all dwo files during dwp scenarios which can be wrong -- if you have .debug_names, lldb won't need to parse all debug info during startup but parse/load lazily so only partial of dwp are parsed while the current implementation reports all parsed/loaded. This is probably a bug in original SymbolFileDWARF::GetSeparateDebugInfo implementation though, cc @zhyty

Copy link
Contributor Author

@qxy11 qxy11 Jun 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should work for DWP and correctly report the # of parsed CUs (see the unit test added test_dwp_dwo_file_count here).

My understanding is that each CU has its own m_dwo shared_ptr object rather than there being a global DWP reference that's shared. It looks like the DWP file first gets loaded here, the specific DWO unit gets indexed here, and then only after the specific DWO data for the CU is parsed does m_dwo get set. So in the code for GetSeparateDebugInfo it should get the correct per-unit counts.

total_loaded_dwo_file_count++;
}
return true;
});
}
}
}
}

json::Value StatsSuccessFail::ToJSON() const {
return json::Object{{"successes", successes}, {"failures", failures}};
}
Expand Down Expand Up @@ -322,6 +353,8 @@ llvm::json::Value DebuggerStats::ReportStatistics(
uint32_t num_modules_with_incomplete_types = 0;
uint32_t num_stripped_modules = 0;
uint32_t symtab_symbol_count = 0;
uint32_t total_loaded_dwo_file_count = 0;
uint32_t total_dwo_file_count = 0;
for (size_t image_idx = 0; image_idx < num_modules; ++image_idx) {
Module *module = target != nullptr
? target->GetImages().GetModuleAtIndex(image_idx).get()
Expand Down Expand Up @@ -353,6 +386,8 @@ llvm::json::Value DebuggerStats::ReportStatistics(
for (const auto &symbol_module : symbol_modules.Modules())
module_stat.symfile_modules.push_back((intptr_t)symbol_module.get());
}
UpdateDwoFileCounts(sym_file, total_dwo_file_count,
total_loaded_dwo_file_count);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably break this down and add statistics for each module using keys named "loadedDwoFileCount" and "dwoFileCount" in each module's statistics. And then we still add totals at the end as you have. So maybe fetch into module_stat first and then update the totals:

UpdateDwoFileCounts(sym_file, module_stat.total_dwo_file_count,
                          module_stat.loaded_dwo_file_count);
total_dwo_file_count += module_stat.total_dwo_file_count;
total_loaded_dwo_file_count += module_stat.loaded_dwo_file_count;

We will need to add two things to ModuleStats:

uint32_t dwo_file_count = 0;
uint32_t loaded_dwo_file_count = 0;

module_stat.debug_info_index_loaded_from_cache =
sym_file->GetDebugInfoIndexWasLoadedFromCache();
if (module_stat.debug_info_index_loaded_from_cache)
Expand Down Expand Up @@ -427,6 +462,8 @@ llvm::json::Value DebuggerStats::ReportStatistics(
{"totalDebugInfoEnabled", num_debug_info_enabled_modules},
{"totalSymbolTableStripped", num_stripped_modules},
{"totalSymbolTableSymbolCount", symtab_symbol_count},
{"totalLoadedDwoFileCount", total_loaded_dwo_file_count},
{"totalDwoFileCount", total_dwo_file_count},
};

if (include_targets) {
Expand Down
123 changes: 123 additions & 0 deletions lldb/test/API/commands/statistics/basic/TestStats.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ def test_default_no_run(self):
"totalDebugInfoIndexLoadedFromCache",
"totalDebugInfoIndexSavedToCache",
"totalDebugInfoParseTime",
"totalDwoFileCount",
"totalLoadedDwoFileCount",
]
self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None)
if self.getPlatform() != "windows":
Expand Down Expand Up @@ -287,6 +289,8 @@ def test_default_with_run(self):
"totalDebugInfoIndexLoadedFromCache",
"totalDebugInfoIndexSavedToCache",
"totalDebugInfoParseTime",
"totalDwoFileCount",
"totalLoadedDwoFileCount",
]
self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None)
stats = debug_stats["targets"][0]
Expand Down Expand Up @@ -325,6 +329,8 @@ def test_memory(self):
"totalDebugInfoIndexLoadedFromCache",
"totalDebugInfoIndexSavedToCache",
"totalDebugInfoByteSize",
"totalDwoFileCount",
"totalLoadedDwoFileCount",
]
self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None)

Expand Down Expand Up @@ -377,6 +383,8 @@ def test_modules(self):
"totalDebugInfoIndexLoadedFromCache",
"totalDebugInfoIndexSavedToCache",
"totalDebugInfoByteSize",
"totalDwoFileCount",
"totalLoadedDwoFileCount",
]
self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None)
stats = debug_stats["targets"][0]
Expand Down Expand Up @@ -485,6 +493,8 @@ def test_breakpoints(self):
"totalDebugInfoIndexLoadedFromCache",
"totalDebugInfoIndexSavedToCache",
"totalDebugInfoByteSize",
"totalDwoFileCount",
"totalLoadedDwoFileCount",
]
self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None)
target_stats = debug_stats["targets"][0]
Expand Down Expand Up @@ -512,6 +522,119 @@ def test_breakpoints(self):
self.verify_keys(
breakpoint, 'target_stats["breakpoints"]', bp_keys_exist, None
)
def test_non_split_dwarf_has_no_dwo_files(self):
"""
Test "statistics dump" and the dwo file count.
Builds a binary without split-dwarf mode, and then
verifies the dwo file count is zero after running "statistics dump"
"""
da = {"CXX_SOURCES": "third.cpp baz.cpp", "EXE": self.getBuildArtifact("a.out")}
self.build(dictionary=da, debug_info=["debug_names"])
self.addTearDownCleanup(dictionary=da)
exe = self.getBuildArtifact("a.out")
target = self.createTestTarget(file_path=exe)
debug_stats = self.get_stats()
self.assertIn("totalDwoFileCount", debug_stats)
self.assertIn("totalLoadedDwoFileCount", debug_stats)

# Verify that the dwo file count is zero
self.assertEqual(debug_stats["totalDwoFileCount"], 0)
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 0)

def test_debug_names_eager_loads_dwo_files(self):
"""
Test the eager loading behavior of DWO files when debug_names is absent by
building a split-dwarf binary without debug_names and then running "statistics dump".
DWO file loading behavior:
- With debug_names: DebugNamesDWARFIndex allows for lazy loading.
DWO files are loaded on-demand when symbols are actually looked up
- Without debug_names: ManualDWARFIndex uses eager loading.
All DWO files are loaded upfront during the first symbol lookup to build a manual index.
"""
da = {"CXX_SOURCES": "third.cpp baz.cpp", "EXE": self.getBuildArtifact("a.out")}
self.build(dictionary=da, debug_info=["dwo"])
self.addTearDownCleanup(dictionary=da)
exe = self.getBuildArtifact("a.out")
target = self.createTestTarget(file_path=exe)
debug_stats = self.get_stats()
self.assertIn("totalDwoFileCount", debug_stats)
self.assertIn("totalLoadedDwoFileCount", debug_stats)

# Verify that all DWO files are loaded
self.assertEqual(debug_stats["totalDwoFileCount"], 2)
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 2)

def test_split_dwarf_dwo_file_count(self):
"""
Test "statistics dump" and the dwo file count.
Builds a binary w/ separate .dwo files and debug_names, and then
verifies the loaded dwo file count is the expected count after running
various commands
"""
da = {"CXX_SOURCES": "third.cpp baz.cpp", "EXE": self.getBuildArtifact("a.out")}
# -gsplit-dwarf creates separate .dwo files,
# -gpubnames enables the debug_names accelerator tables for faster symbol lookup
# and lazy loading of DWO files
# Expected output: third.dwo (contains main) and baz.dwo (contains Baz struct/function)
self.build(dictionary=da, debug_info=["dwo", "debug_names"])
self.addTearDownCleanup(dictionary=da)
exe = self.getBuildArtifact("a.out")
target = self.createTestTarget(file_path=exe)
debug_stats = self.get_stats()

# Initially: 2 DWO files available but none loaded yet
self.assertIn("totalLoadedDwoFileCount", debug_stats)
self.assertIn("totalDwoFileCount", debug_stats)
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 0)
self.assertEqual(debug_stats["totalDwoFileCount"], 2)

# Setting breakpoint in main triggers loading of third.dwo (contains main function)
self.runCmd("b main")
debug_stats = self.get_stats()
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 1)
self.assertEqual(debug_stats["totalDwoFileCount"], 2)

# Type lookup forces loading of baz.dwo (contains struct Baz definition)
self.runCmd("type lookup Baz")
debug_stats = self.get_stats()
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 2)
self.assertEqual(debug_stats["totalDwoFileCount"], 2)

def test_dwp_dwo_file_count(self):
"""
Test "statistics dump" and the loaded dwo file count.
Builds a binary w/ a separate .dwp file and debug_names, and then
verifies the loaded dwo file count is the expected count after running
various commands.

We expect the DWO file counters to reflect the number of compile units
loaded from the DWP file (each representing what was originally a separate DWO file)
"""
da = {"CXX_SOURCES": "third.cpp baz.cpp", "EXE": self.getBuildArtifact("a.out")}
self.build(dictionary=da, debug_info=["dwp", "debug_names"])
self.addTearDownCleanup(dictionary=da)
exe = self.getBuildArtifact("a.out")
target = self.createTestTarget(file_path=exe)
debug_stats = self.get_stats()

# Initially: 2 DWO files available but none loaded yet
self.assertIn("totalLoadedDwoFileCount", debug_stats)
self.assertIn("totalDwoFileCount", debug_stats)
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 0)
self.assertEqual(debug_stats["totalDwoFileCount"], 2)

# Setting breakpoint in main triggers parsing of the CU within a.dwp corresponding to third.dwo (contains main function)
self.runCmd("b main")
debug_stats = self.get_stats()
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 1)
self.assertEqual(debug_stats["totalDwoFileCount"], 2)

# Type lookup forces parsing of the CU within a.dwp corresponding to baz.dwo (contains struct Baz definition)
self.runCmd("type lookup Baz")
debug_stats = self.get_stats()
self.assertEqual(debug_stats["totalDwoFileCount"], 2)
self.assertEqual(debug_stats["totalLoadedDwoFileCount"], 2)


@skipUnlessDarwin
@no_debug_info_test
Expand Down
12 changes: 12 additions & 0 deletions lldb/test/API/commands/statistics/basic/baz.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Helper that the lldb command `statistics dump` works in split-dwarf mode.

struct Baz {
int x;
bool y;
};

void baz() {
Baz b;
b.x = 1;
b.y = true;
}
7 changes: 7 additions & 0 deletions lldb/test/API/commands/statistics/basic/third.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// Test that the lldb command `statistics dump` works.

void baz();
int main(void) {
baz();
return 0;
}