Skip to content

Commit 1685a6a

Browse files
authored
[BOLT] Fix incorrect CU-indicies in gdb-index (#151927)
After we sort the CUVector, we have to update CU-indices in address map and constant pool
1 parent 39ed57c commit 1685a6a

6 files changed

+106
-18
lines changed

bolt/lib/Core/GDBIndex.cpp

Lines changed: 95 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ void GDBIndex::updateGdbIndexSection(
7777
exit(1);
7878
}
7979
DenseSet<uint64_t> OriginalOffsets;
80-
for (unsigned Index = 0, Units = BC.DwCtx->getNumCompileUnits();
80+
for (unsigned Index = 0, PresentUnitsIndex = 0,
81+
Units = BC.DwCtx->getNumCompileUnits();
8182
Index < Units; ++Index) {
8283
const DWARFUnit *CU = BC.DwCtx->getUnitAtIndex(Index);
8384
if (SkipTypeUnits && CU->isTypeUnit())
@@ -90,7 +91,7 @@ void GDBIndex::updateGdbIndexSection(
9091
}
9192

9293
OriginalOffsets.insert(Offset);
93-
OffsetToIndexMap[Offset] = Index;
94+
OffsetToIndexMap[Offset] = PresentUnitsIndex++;
9495
}
9596

9697
// Ignore old address table.
@@ -125,16 +126,52 @@ void GDBIndex::updateGdbIndexSection(
125126

126127
using MapEntry = std::pair<uint32_t, CUInfo>;
127128
std::vector<MapEntry> CUVector(CUMap.begin(), CUMap.end());
129+
// Remove the CUs we won't emit anyway.
130+
CUVector.erase(std::remove_if(CUVector.begin(), CUVector.end(),
131+
[&OriginalOffsets](const MapEntry &It) {
132+
// Skipping TU for DWARF5 when they are not
133+
// included in CU list.
134+
return OriginalOffsets.count(It.first) == 0;
135+
}),
136+
CUVector.end());
128137
// Need to sort since we write out all of TUs in .debug_info before CUs.
129138
std::sort(CUVector.begin(), CUVector.end(),
130139
[](const MapEntry &E1, const MapEntry &E2) -> bool {
131140
return E1.second.Offset < E2.second.Offset;
132141
});
142+
// Create the original CU index -> updated CU index mapping,
143+
// as the sort above could've changed the order and we have to update
144+
// indices correspondingly in address map and constant pool.
145+
std::unordered_map<uint32_t, uint32_t> OriginalCUIndexToUpdatedCUIndexMap;
146+
OriginalCUIndexToUpdatedCUIndexMap.reserve(CUVector.size());
147+
for (uint32_t I = 0; I < CUVector.size(); ++I) {
148+
OriginalCUIndexToUpdatedCUIndexMap[OffsetToIndexMap.at(CUVector[I].first)] =
149+
I;
150+
}
151+
const auto RemapCUIndex = [&OriginalCUIndexToUpdatedCUIndexMap,
152+
CUVectorSize = CUVector.size(),
153+
TUVectorSize = getGDBIndexTUEntryVector().size()](
154+
uint32_t OriginalIndex) {
155+
if (OriginalIndex >= CUVectorSize) {
156+
if (OriginalIndex >= CUVectorSize + TUVectorSize) {
157+
errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
158+
exit(1);
159+
}
160+
// The index is into TU CU List, which we don't reorder, so return as is.
161+
return OriginalIndex;
162+
}
163+
164+
const auto It = OriginalCUIndexToUpdatedCUIndexMap.find(OriginalIndex);
165+
if (It == OriginalCUIndexToUpdatedCUIndexMap.end()) {
166+
errs() << "BOLT-ERROR: .gdb_index unknown CU index\n";
167+
exit(1);
168+
}
169+
170+
return It->second;
171+
};
172+
133173
// Writing out CU List <Offset, Size>
134174
for (auto &CUInfo : CUVector) {
135-
// Skipping TU for DWARF5 when they are not included in CU list.
136-
if (!OriginalOffsets.count(CUInfo.first))
137-
continue;
138175
write64le(Buffer, CUInfo.second.Offset);
139176
// Length encoded in CU doesn't contain first 4 bytes that encode length.
140177
write64le(Buffer + 8, CUInfo.second.Length + 4);
@@ -160,12 +197,13 @@ void GDBIndex::updateGdbIndexSection(
160197
// Generate new address table.
161198
for (const std::pair<const uint64_t, DebugAddressRangesVector> &CURangesPair :
162199
ARangesSectionWriter.getCUAddressRanges()) {
163-
const uint32_t CUIndex = OffsetToIndexMap[CURangesPair.first];
200+
const uint32_t OriginalCUIndex = OffsetToIndexMap[CURangesPair.first];
201+
const uint32_t UpdatedCUIndex = RemapCUIndex(OriginalCUIndex);
164202
const DebugAddressRangesVector &Ranges = CURangesPair.second;
165203
for (const DebugAddressRange &Range : Ranges) {
166204
write64le(Buffer, Range.LowPC);
167205
write64le(Buffer + 8, Range.HighPC);
168-
write32le(Buffer + 16, CUIndex);
206+
write32le(Buffer + 16, UpdatedCUIndex);
169207
Buffer += 20;
170208
}
171209
}
@@ -178,6 +216,56 @@ void GDBIndex::updateGdbIndexSection(
178216
// Copy over the rest of the original data.
179217
memcpy(Buffer, Data, TrailingSize);
180218

219+
// Fixup CU-indices in constant pool.
220+
const char *const OriginalConstantPoolData =
221+
GdbIndexContents.data() + ConstantPoolOffset;
222+
uint8_t *const UpdatedConstantPoolData =
223+
NewGdbIndexContents + ConstantPoolOffset + Delta;
224+
225+
const char *OriginalSymbolTableData =
226+
GdbIndexContents.data() + SymbolTableOffset;
227+
std::set<uint32_t> CUVectorOffsets;
228+
// Parse the symbol map and extract constant pool CU offsets from it.
229+
while (OriginalSymbolTableData < OriginalConstantPoolData) {
230+
const uint32_t NameOffset = read32le(OriginalSymbolTableData);
231+
const uint32_t CUVectorOffset = read32le(OriginalSymbolTableData + 4);
232+
OriginalSymbolTableData += 8;
233+
234+
// Iff both are zero, then the slot is considered empty in the hash-map.
235+
if (NameOffset || CUVectorOffset) {
236+
CUVectorOffsets.insert(CUVectorOffset);
237+
}
238+
}
239+
240+
// Update the CU-indicies in the constant pool
241+
for (const auto CUVectorOffset : CUVectorOffsets) {
242+
const char *CurrentOriginalConstantPoolData =
243+
OriginalConstantPoolData + CUVectorOffset;
244+
uint8_t *CurrentUpdatedConstantPoolData =
245+
UpdatedConstantPoolData + CUVectorOffset;
246+
247+
const uint32_t Num = read32le(CurrentOriginalConstantPoolData);
248+
CurrentOriginalConstantPoolData += 4;
249+
CurrentUpdatedConstantPoolData += 4;
250+
251+
for (uint32_t J = 0; J < Num; ++J) {
252+
const uint32_t OriginalCUIndexAndAttributes =
253+
read32le(CurrentOriginalConstantPoolData);
254+
CurrentOriginalConstantPoolData += 4;
255+
256+
// We only care for the index, which is the lowest 24 bits, other bits are
257+
// left as is.
258+
const uint32_t OriginalCUIndex =
259+
OriginalCUIndexAndAttributes & ((1 << 24) - 1);
260+
const uint32_t Attributes = OriginalCUIndexAndAttributes >> 24;
261+
const uint32_t UpdatedCUIndexAndAttributes =
262+
RemapCUIndex(OriginalCUIndex) | (Attributes << 24);
263+
264+
write32le(CurrentUpdatedConstantPoolData, UpdatedCUIndexAndAttributes);
265+
CurrentUpdatedConstantPoolData += 4;
266+
}
267+
}
268+
181269
// Register the new section.
182270
BC.registerOrUpdateNoteSection(".gdb_index", NewGdbIndexContents,
183271
NewGdbIndexSize);

bolt/test/X86/dwarf5-dwarf4-gdb-index-types-gdb-generated-gdb11.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
# POSTCHECK-NEXT: 1: offset = 0x00000000, type_offset = 0x0000001e, type_signature = 0x00f6cca4e3a15118
1919
# POSTCHECK: Address area offset = 0x68, has 2 entries
2020
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]],
21-
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 1
21+
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 0
2222
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR1:]],
23-
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 2
23+
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 1
2424
# POSTCHECK: Symbol table offset = 0x90, size = 1024, filled slots
2525
# POSTCHECK-NEXT: 2: Name offset = 0x20, CU vector offset = 0x0
2626
# POSTCHECK-NEXT: String name: S, CU vector index: 0

bolt/test/X86/dwarf5-dwarf4-gdb-index-types-lld-generated.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
# POSTCHECK: Types CU list offset = 0x38, has 0 entries
1616
# POSTCHECK: Address area offset = 0x38, has 2 entries
1717
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]],
18-
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 1
18+
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 0
1919
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR1:]],
20-
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 2
20+
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 1
2121
# POSTCHECK: Symbol table offset = 0x60, size = 1024, filled slots
2222
# POSTCHECK-NEXT: 2: Name offset = 0x38, CU vector offset = 0x0
2323
# POSTCHECK-NEXT: String name: S, CU vector index: 0

bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb11.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
# POSTCHECK-NEXT: 1: offset = 0x00000040, type_offset = 0x00000023, type_signature = 0x00f6cca4e3a15118
1919
# POSTCHECK: Address area offset = 0x68, has 2 entries
2020
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]],
21-
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 1
21+
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 0
2222
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR1:]],
23-
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 3
23+
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 1
2424
# POSTCHECK: Symbol table offset = 0x90, size = 1024, filled slots
2525
# POSTCHECK-NEXT: 2: Name offset = 0x28, CU vector offset = 0x0
2626
# POSTCHECK-NEXT: String name: S, CU vector index: 0

bolt/test/X86/dwarf5-gdb-index-types-gdb-generated-gdb9.test

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# POSTCHECK-NEXT: 1: offset = 0x00000040, type_offset = 0x00000023, type_signature = 0x00f6cca4e3a15118
2121
# POSTCHECK: Address area offset = 0x88, has 2 entries
2222
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]],
23-
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 1
23+
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 2
2424
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR1:]],
2525
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 3
2626
# POSTCHECK: Symbol table offset = 0xb0, size = 1024, filled slots
@@ -37,7 +37,7 @@
3737
# POSTCHECK-NEXT: 754: Name offset = 0x43, CU vector offset = 0x0
3838
# POSTCHECK-NEXT: String name: int, CU vector index: 0
3939
# POSTCHECK: Constant pool offset = 0x20b0, has 5 CU vectors
40-
# POSTCHECK-NEXT: 0(0x0): 0x90000001
40+
# POSTCHECK-NEXT: 0(0x0): 0x90000002
4141
# POSTCHECK-NEXT: 1(0x8): 0x90000003
42-
# POSTCHECK-NEXT: 2(0x10): 0x30000001
42+
# POSTCHECK-NEXT: 2(0x10): 0x30000002
4343
# POSTCHECK-NEXT: 3(0x18): 0x30000003

bolt/test/X86/dwarf5-gdb-index-types-lld-generated.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
# POSTCHECK: Types CU list offset = 0x38, has 0 entries
1616
# POSTCHECK: Address area offset = 0x38, has 2 entries
1717
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR:]],
18-
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 1
18+
# POSTCHECK-SAME: 0x[[#ADDR + 0xf]]) (Size: 0xf), CU id = 0
1919
# POSTCHECK-NEXT: Low/High address = [0x[[#%.4x,ADDR1:]],
20-
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 3
20+
# POSTCHECK-SAME: 0x[[#ADDR1 + 0xd]]) (Size: 0xd), CU id = 1
2121
# POSTCHECK: Symbol table offset = 0x60, size = 1024, filled slots
2222
# POSTCHECK-NEXT: 2: Name offset = 0x38, CU vector offset = 0x0
2323
# POSTCHECK-NEXT: String name: S, CU vector index: 0

0 commit comments

Comments
 (0)