Skip to content

Commit 99afa70

Browse files
NerixyzDebadri Basak
authored andcommitted
[LLDB][NativePDB] Estimate symbol sizes (llvm#165727)
In llvm#165604, a test was skipped on Windows, because the native PDB plugin didn't set sizes on symbols. While the test isn't compiled with debug info, it's linked with `-gdwarf`, causing a PDB to be created on Windows. This PDB will only contain the public symbols (written by the linker) and section information. The symbols themselves don't have a size, however the DIA SDK sets a size for them. It seems like, for these data symbols, the size given from DIA is the distance to the next symbol (or the section end). This PR implements the naive approach for the native plugin. The main difference is in function/code symbols. There, DIA searches for a corresponding `S_GPROC32` which have a "code size" that is sometimes slightly smaller than the difference to the next symbol.
1 parent 6afb627 commit 99afa70

File tree

3 files changed

+69
-35
lines changed

3 files changed

+69
-35
lines changed

lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,16 +1130,47 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
11301130
if (!section_list)
11311131
return;
11321132

1133-
for (auto pid : m_index->publics().getPublicsTable()) {
1133+
PublicSym32 last_sym;
1134+
size_t last_sym_idx = 0;
1135+
lldb::SectionSP section_sp;
1136+
1137+
// To estimate the size of a symbol, we use the difference to the next symbol.
1138+
// If there's no next symbol or the section/segment changed, the symbol will
1139+
// take the remaining space. The estimate can be too high in case there's
1140+
// padding between symbols. This similar to the algorithm used by the DIA
1141+
// SDK.
1142+
auto finish_last_symbol = [&](const PublicSym32 *next) {
1143+
if (!section_sp)
1144+
return;
1145+
Symbol *last = symtab.SymbolAtIndex(last_sym_idx);
1146+
if (!last)
1147+
return;
1148+
1149+
if (next && last_sym.Segment == next->Segment) {
1150+
assert(last_sym.Offset <= next->Offset);
1151+
last->SetByteSize(next->Offset - last_sym.Offset);
1152+
} else {
1153+
// the last symbol was the last in its section
1154+
assert(section_sp->GetByteSize() >= last_sym.Offset);
1155+
assert(!next || next->Segment > last_sym.Segment);
1156+
last->SetByteSize(section_sp->GetByteSize() - last_sym.Offset);
1157+
}
1158+
};
1159+
1160+
// The address map is sorted by the address of a symbol.
1161+
for (auto pid : m_index->publics().getAddressMap()) {
11341162
PdbGlobalSymId global{pid, true};
11351163
CVSymbol sym = m_index->ReadSymbolRecord(global);
11361164
auto kind = sym.kind();
11371165
if (kind != S_PUB32)
11381166
continue;
11391167
PublicSym32 pub =
11401168
llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym));
1169+
finish_last_symbol(&pub);
1170+
1171+
if (!section_sp || last_sym.Segment != pub.Segment)
1172+
section_sp = section_list->FindSectionByID(pub.Segment);
11411173

1142-
auto section_sp = section_list->FindSectionByID(pub.Segment);
11431174
if (!section_sp)
11441175
continue;
11451176

@@ -1148,20 +1179,24 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
11481179
(pub.Flags & PublicSymFlags::Code) != PublicSymFlags::None)
11491180
type = eSymbolTypeCode;
11501181

1151-
symtab.AddSymbol(Symbol(/*symID=*/pid,
1152-
/*name=*/pub.Name,
1153-
/*type=*/type,
1154-
/*external=*/true,
1155-
/*is_debug=*/true,
1156-
/*is_trampoline=*/false,
1157-
/*is_artificial=*/false,
1158-
/*section_sp=*/section_sp,
1159-
/*value=*/pub.Offset,
1160-
/*size=*/0,
1161-
/*size_is_valid=*/false,
1162-
/*contains_linker_annotations=*/false,
1163-
/*flags=*/0));
1164-
}
1182+
last_sym_idx =
1183+
symtab.AddSymbol(Symbol(/*symID=*/pid,
1184+
/*name=*/pub.Name,
1185+
/*type=*/type,
1186+
/*external=*/true,
1187+
/*is_debug=*/true,
1188+
/*is_trampoline=*/false,
1189+
/*is_artificial=*/false,
1190+
/*section_sp=*/section_sp,
1191+
/*value=*/pub.Offset,
1192+
/*size=*/0,
1193+
/*size_is_valid=*/false,
1194+
/*contains_linker_annotations=*/false,
1195+
/*flags=*/0));
1196+
last_sym = pub;
1197+
}
1198+
1199+
finish_last_symbol(nullptr);
11651200
}
11661201

11671202
size_t SymbolFileNativePDB::ParseFunctions(CompileUnit &comp_unit) {

lldb/test/API/functionalities/multiple-slides/TestMultipleSlides.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@
1212
class MultipleSlidesTestCase(TestBase):
1313
NO_DEBUG_INFO_TESTCASE = True
1414

15-
# The intermediate object main.o is compiled without debug info, but
16-
# a.out is linked with `-gdwarf` on Windows. This creates a PDB.
17-
# However, in the native PDB plugin, the symbols don't have a size.
18-
@expectedFailureWindows
1915
def test_mulitple_slides(self):
2016
"""Test that a binary can be slid multiple times correctly."""
2117
self.build()
@@ -33,10 +29,13 @@ def test_mulitple_slides(self):
3329
first_sym.GetEndAddress().GetOffset()
3430
- first_sym.GetStartAddress().GetOffset()
3531
)
32+
int_size = target.FindFirstType("int").GetByteSize()
33+
self.assertGreaterEqual(first_size, 2048 * int_size)
3634
second_size = (
3735
second_sym.GetEndAddress().GetOffset()
3836
- second_sym.GetStartAddress().GetOffset()
3937
)
38+
self.assertGreaterEqual(second_size, 2048 * int_size)
4039

4140
# View the first element of `first` and `second` while
4241
# they have no load address set.

lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,18 @@ int main(int argc, char **argv) {
4242
return ns::a_function() + b.b_func();
4343
}
4444

45-
// CHECK-DAG: Code {{.*}} main
46-
// CHECK-DAG: Code {{.*}} ?b_func@?$B@F@ns@@QEBAHXZ
47-
// CHECK-DAG: Code {{.*}} ?something@A@@QEAAXXZ
48-
// CHECK-DAG: Code {{.*}} ??_GDyn@ns@@UEAAPEAXI@Z
49-
// CHECK-DAG: Code {{.*}} ??2@YAPEAX_K@Z
50-
// CHECK-DAG: Code {{.*}} ??3@YAXPEAX_K@Z
51-
// CHECK-DAG: Code {{.*}} ?static_fn@C@?$B@H@ns@@SAHXZ
52-
// CHECK-DAG: Code {{.*}} ?a_function@ns@@YAHXZ
53-
// CHECK-DAG: Code {{.*}} ?static_fn@C@?$B@_N@ns@@SAHXZ
54-
// CHECK-DAG: Code {{.*}} ??1Dyn@ns@@UEAA@XZ
55-
// CHECK-DAG: Code {{.*}} ??0Dyn@ns@@QEAA@XZ
56-
// CHECK-DAG: Data {{.*}} ?global_int@@3HA
57-
// CHECK-DAG: Data {{.*}} ??_7Dyn@ns@@6B@
58-
// CHECK-DAG: Data {{.*}} ?global_a@@3UA@@A
59-
// CHECK-DAG: Data {{.*}} ?global_c@@3UC@?$B@_J@ns@@A
45+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 main
46+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?b_func@?$B@F@ns@@QEBAHXZ
47+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?something@A@@QEAAXXZ
48+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??_GDyn@ns@@UEAAPEAXI@Z
49+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??2@YAPEAX_K@Z
50+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??3@YAXPEAX_K@Z
51+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?static_fn@C@?$B@H@ns@@SAHXZ
52+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?a_function@ns@@YAHXZ
53+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?static_fn@C@?$B@_N@ns@@SAHXZ
54+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??1Dyn@ns@@UEAA@XZ
55+
// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??0Dyn@ns@@QEAA@XZ
56+
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_int@@3HA
57+
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??_7Dyn@ns@@6B@
58+
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_a@@3UA@@A
59+
// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_c@@3UC@?$B@_J@ns@@A

0 commit comments

Comments
 (0)