-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LLDB] Omit loading local symbols in LLDB symbol table #154809
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-lldb Author: None (barsolo2000) ChangesImproving symbolication by excluding local symbols that are typically not useful for debugging or symbol lookups. This aligns with the discussion that local symbols, especially those with STB_LOCAL binding and STT_NOTYPE type (including .L-prefixed symbols), often interfere with symbol resolution and can be safely omitted. Full diff: https://github.com/llvm/llvm-project/pull/154809.diff 2 Files Affected:
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index f69358de6a288..cfa446d185b14 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -2037,6 +2037,18 @@ static char FindArmAarch64MappingSymbol(const char *symbol_name) {
return '\0';
}
+static char FindRISCVMappingSymbol(const char *symbol_name) {
+ if (!symbol_name)
+ return '\0';
+
+ if (symbol_name[0] == '$' &&
+ (symbol_name[1] == 'd' || symbol_name[1] == 'x') &&
+ symbol_name[2] == '\0') {
+ return symbol_name[1];
+ }
+ return '\0';
+}
+
#define STO_MIPS_ISA (3 << 6)
#define STO_MICROMIPS (2 << 6)
#define IS_MICROMIPS(ST_OTHER) (((ST_OTHER)&STO_MIPS_ISA) == STO_MICROMIPS)
@@ -2102,6 +2114,8 @@ ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
if (!symbol_name)
symbol_name = "";
+ if (symbol_name[0] == '.' && symbol_name[1] == 'L')
+ continue;
// No need to add non-section symbols that have no names
if (symbol.getType() != STT_SECTION &&
(symbol_name == nullptr || symbol_name[0] == '\0'))
@@ -2190,9 +2204,9 @@ ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
int64_t symbol_value_offset = 0;
uint32_t additional_flags = 0;
-
+ llvm::Triple::ArchType arch_machine = arch.GetMachine();
if (arch.IsValid()) {
- if (arch.GetMachine() == llvm::Triple::arm) {
+ if (arch_machine == llvm::Triple::arm) {
if (symbol.getBinding() == STB_LOCAL) {
char mapping_symbol = FindArmAarch64MappingSymbol(symbol_name);
if (symbol_type == eSymbolTypeCode) {
@@ -2217,7 +2231,7 @@ ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
if (mapping_symbol)
continue;
}
- } else if (arch.GetMachine() == llvm::Triple::aarch64) {
+ } else if (arch_machine == llvm::Triple::aarch64) {
if (symbol.getBinding() == STB_LOCAL) {
char mapping_symbol = FindArmAarch64MappingSymbol(symbol_name);
if (symbol_type == eSymbolTypeCode) {
@@ -2235,9 +2249,30 @@ ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
if (mapping_symbol)
continue;
}
+ } else if (arch_machine == llvm::Triple::riscv32 ||
+ arch_machine == llvm::Triple::riscv64 ||
+ arch_machine == llvm::Triple::riscv32be ||
+ arch_machine == llvm::Triple::riscv64be) {
+ if (symbol.getBinding() == STB_LOCAL) {
+ char mapping_symbol = FindRISCVMappingSymbol(symbol_name);
+ if (symbol_type == eSymbolTypeCode) {
+ switch (mapping_symbol) {
+ case 'x':
+ // $x - marks a RISCV instruction sequence
+ address_class_map[symbol.st_value] = AddressClass::eCode;
+ break;
+ case 'd':
+ // $d - marks a RISCV data item sequence
+ address_class_map[symbol.st_value] = AddressClass::eData;
+ break;
+ }
+ }
+ if (mapping_symbol)
+ continue;
+ }
}
- if (arch.GetMachine() == llvm::Triple::arm) {
+ if (arch_machine == llvm::Triple::arm) {
if (symbol_type == eSymbolTypeCode) {
if (symbol.st_value & 1) {
// Subtracting 1 from the address effectively unsets the low order
diff --git a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp
index 80abc5b80f84d..9af0aeb9bab50 100644
--- a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp
+++ b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp
@@ -308,3 +308,78 @@ TEST_F(ObjectFileELFTest, GetSymtab_NoSymEntryPointArmAddressClass) {
auto entry_point_addr = module_sp->GetObjectFile()->GetEntryPointAddress();
ASSERT_EQ(entry_point_addr.GetAddressClass(), AddressClass::eCode);
}
+
+TEST_F(ObjectFileELFTest, SkipsLocalMappingAndDotLSymbols) {
+ auto ExpectedFile = TestFile::fromYaml(R"(
+--- !ELF
+ FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_RISCV
+ Flags: [ EF_RISCV_RVC, EF_RISCV_FLOAT_ABI_SINGLE ]
+ Entry: 0xC0A1B010
+ Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x0000000000400180
+ AddressAlign: 0x0000000000000010
+ Content: 554889E5
+ - Name: .data
+ Type: SHT_PROGBITS
+ Flags: [ SHF_WRITE, SHF_ALLOC ]
+ Address: 0x0000000000601000
+ AddressAlign: 0x0000000000000004
+ Content: 2F000000
+ Symbols:
+ - Name: $d
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000400180
+ Size: 0x10
+ Binding: STB_LOCAL
+ - Name: $x
+ Type: STT_FUNC
+ Section: .text
+ Value: 0xC0A1B010
+ Size: 0x10
+ Binding: STB_LOCAL
+ - Name: .Lfoo
+ Type: STT_OBJECT
+ Section: .data
+ Value: 0x0000000000601000
+ Size: 0x4
+ Binding: STB_LOCAL
+ - Name: global_func
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x00000000004001A0
+ Size: 0x10
+ Binding: STB_GLOBAL
+ - Name: global_obj
+ Type: STT_OBJECT
+ Section: .data
+ Value: 0x0000000000601004
+ Size: 0x4
+ Binding: STB_GLOBAL
+...
+ )");
+ ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
+ auto module_sp = std::make_shared<Module>(ExpectedFile->moduleSpec());
+ auto *symtab = module_sp->GetSymtab();
+ ASSERT_NE(nullptr, symtab);
+ EXPECT_EQ(nullptr, module_sp->FindFirstSymbolWithNameAndType(
+ ConstString("$d"), eSymbolTypeAny));
+ EXPECT_EQ(nullptr, module_sp->FindFirstSymbolWithNameAndType(
+ ConstString("$x"), eSymbolTypeAny));
+ EXPECT_EQ(nullptr, module_sp->FindFirstSymbolWithNameAndType(
+ ConstString(".Lfoo"), eSymbolTypeAny));
+ // assert that other symbols are present
+ const Symbol *global_func = module_sp->FindFirstSymbolWithNameAndType(
+ ConstString("global_func"), eSymbolTypeAny);
+ ASSERT_NE(nullptr, global_func);
+ const Symbol *global_obj = module_sp->FindFirstSymbolWithNameAndType(
+ ConstString("global_obj"), eSymbolTypeAny);
+ ASSERT_NE(nullptr, global_obj);
+}
|
| Content: 2F000000 | ||
| Symbols: | ||
| - Name: $d | ||
| Type: STT_FUNC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
$d and $x are not STT_FUNC type. They are unknown type, i.e. Type is empty.
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
02e2f36 to
315266d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure the FindRISCVMappingSymbol is making the code cleaner here since we have to switch on the result anyway.
I think it would look cleaner to just check the symbol directly.
if (strcmp(symbol_name, "$d") == 0) {
address_class_map[symbol.st_value] = AddressClass::eCode;
continue;
}
if (strcmp(symbol_name, "$x") == 0) {
address_class_map[symbol.st_value] = AddressClass::eData;
continue;
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We discussed offline and decided to keep it the way I did before to follow same style as existing code.
8b01f93 to
b97b006
Compare
dmpots
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
Looks good to me as well ! |
https://discourse.llvm.org/t/rfc-should-we-omit-local-symbols-in-eekciihgtfvflvnbieicunjlrtnufhuelf-files-from-the-lldb-symbol-table/87384
Improving symbolication by excluding local symbols that are typically not useful for debugging or symbol lookups. This aligns with the discussion that local symbols, especially those with STB_LOCAL binding and STT_NOTYPE type (including .L-prefixed symbols), often interfere with symbol resolution and can be safely omitted.