Skip to content

Commit 56edf06

Browse files
avl-llvmtru
authored andcommitted
[dsymutil] dsymutil produces broken lines info (probably) with LTO on mac
This patch fixes #60307 issue. The 8bb4451 introduces the possibility to unite overlapped or adjacent address ranges to keep address ranges in an unambiguous state. The AddressRangesMap is used to normalize address ranges. The AddressRangesMap keeps address ranges and the value of the relocated address. For intersected range, it creates a united range that keeps the last inserted mapping value. The same for adjusted ranges. While it is OK to use the last inserted mapping value for intersected ranges (as there is no way how to resolve ambiguity) It is not OK to use the last inserted value for adjacent address ranges. Currently, two following address ranges are united into a single one: {0,24,17e685c} {24,d8,55afe20} -> {0,d8,55afe20} To avoid the problem, the AddressRangesMap should not unite adjacent address ranges with different relocated addresses. Instead, it should leave adjacent address ranges as separate ranges. So, the ranges should look like this: {0,24,17e685c} {24,d8,55afe20} Differential Revision: https://reviews.llvm.org/D142936 (cherry picked from commit 1e72920)
1 parent bec9a60 commit 56edf06

File tree

7 files changed

+398
-207
lines changed

7 files changed

+398
-207
lines changed

llvm/include/llvm/ADT/AddressRanges.h

Lines changed: 136 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ class AddressRange {
2828
uint64_t start() const { return Start; }
2929
uint64_t end() const { return End; }
3030
uint64_t size() const { return End - Start; }
31+
uint64_t empty() const { return size() == 0; }
3132
bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; }
33+
bool contains(const AddressRange &R) const {
34+
return Start <= R.Start && R.End <= End;
35+
}
3236
bool intersects(const AddressRange &R) const {
3337
return Start < R.End && R.Start < End;
3438
}
@@ -45,101 +49,163 @@ class AddressRange {
4549
uint64_t End = 0;
4650
};
4751

48-
/// The AddressRanges class helps normalize address range collections.
49-
/// This class keeps a sorted vector of AddressRange objects and can perform
50-
/// insertions and searches efficiently. The address ranges are always sorted
51-
/// and never contain any invalid or empty address ranges.
52-
/// Intersecting([100,200), [150,300)) and adjacent([100,200), [200,300))
53-
/// address ranges are combined during insertion.
54-
class AddressRanges {
52+
/// The AddressRangesBase class presents the base functionality for the
53+
/// normalized address ranges collection. This class keeps a sorted vector
54+
/// of AddressRange-like objects and can perform searches efficiently.
55+
/// The address ranges are always sorted and never contain any invalid,
56+
/// empty or intersected address ranges.
57+
58+
template <typename T> class AddressRangesBase {
5559
protected:
56-
using Collection = SmallVector<AddressRange>;
60+
using Collection = SmallVector<T>;
5761
Collection Ranges;
5862

5963
public:
6064
void clear() { Ranges.clear(); }
6165
bool empty() const { return Ranges.empty(); }
62-
bool contains(uint64_t Addr) const { return find(Addr) != Ranges.end(); }
66+
bool contains(uint64_t Addr) const {
67+
return find(Addr, Addr + 1) != Ranges.end();
68+
}
6369
bool contains(AddressRange Range) const {
64-
return find(Range) != Ranges.end();
70+
return find(Range.start(), Range.end()) != Ranges.end();
6571
}
66-
std::optional<AddressRange> getRangeThatContains(uint64_t Addr) const {
67-
Collection::const_iterator It = find(Addr);
72+
void reserve(size_t Capacity) { Ranges.reserve(Capacity); }
73+
size_t size() const { return Ranges.size(); }
74+
75+
std::optional<T> getRangeThatContains(uint64_t Addr) const {
76+
typename Collection::const_iterator It = find(Addr, Addr + 1);
6877
if (It == Ranges.end())
6978
return std::nullopt;
7079

7180
return *It;
7281
}
73-
Collection::const_iterator insert(AddressRange Range);
74-
void reserve(size_t Capacity) { Ranges.reserve(Capacity); }
75-
size_t size() const { return Ranges.size(); }
76-
bool operator==(const AddressRanges &RHS) const {
77-
return Ranges == RHS.Ranges;
78-
}
79-
const AddressRange &operator[](size_t i) const {
82+
83+
typename Collection::const_iterator begin() const { return Ranges.begin(); }
84+
typename Collection::const_iterator end() const { return Ranges.end(); }
85+
86+
const T &operator[](size_t i) const {
8087
assert(i < Ranges.size());
8188
return Ranges[i];
8289
}
83-
Collection::const_iterator begin() const { return Ranges.begin(); }
84-
Collection::const_iterator end() const { return Ranges.end(); }
90+
91+
bool operator==(const AddressRangesBase<T> &RHS) const {
92+
return Ranges == RHS.Ranges;
93+
}
8594

8695
protected:
87-
Collection::const_iterator find(uint64_t Addr) const;
88-
Collection::const_iterator find(AddressRange Range) const;
96+
typename Collection::const_iterator find(uint64_t Start, uint64_t End) const {
97+
if (Start >= End)
98+
return Ranges.end();
99+
100+
auto It =
101+
std::partition_point(Ranges.begin(), Ranges.end(), [=](const T &R) {
102+
return AddressRange(R).start() <= Start;
103+
});
104+
105+
if (It == Ranges.begin())
106+
return Ranges.end();
107+
108+
--It;
109+
if (End > AddressRange(*It).end())
110+
return Ranges.end();
111+
112+
return It;
113+
}
89114
};
90115

91-
/// AddressRangesMap class maps values to the address ranges.
92-
/// It keeps address ranges and corresponding values. If ranges
93-
/// are combined during insertion, then combined range keeps
94-
/// newly inserted value.
95-
template <typename T> class AddressRangesMap : protected AddressRanges {
116+
/// The AddressRanges class helps normalize address range collections.
117+
/// This class keeps a sorted vector of AddressRange objects and can perform
118+
/// insertions and searches efficiently. Intersecting([100,200), [150,300))
119+
/// and adjacent([100,200), [200,300)) address ranges are combined during
120+
/// insertion.
121+
class AddressRanges : public AddressRangesBase<AddressRange> {
96122
public:
97-
void clear() {
98-
Ranges.clear();
99-
Values.clear();
123+
Collection::const_iterator insert(AddressRange Range) {
124+
if (Range.empty())
125+
return Ranges.end();
126+
127+
auto It = llvm::upper_bound(Ranges, Range);
128+
auto It2 = It;
129+
while (It2 != Ranges.end() && It2->start() <= Range.end())
130+
++It2;
131+
if (It != It2) {
132+
Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())};
133+
It = Ranges.erase(It, It2);
134+
}
135+
if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) {
136+
--It;
137+
*It = {It->start(), std::max(It->end(), Range.end())};
138+
return It;
139+
}
140+
141+
return Ranges.insert(It, Range);
100142
}
101-
bool empty() const { return AddressRanges::empty(); }
102-
bool contains(uint64_t Addr) const { return AddressRanges::contains(Addr); }
103-
bool contains(AddressRange Range) const {
104-
return AddressRanges::contains(Range);
105-
}
106-
void insert(AddressRange Range, T Value) {
107-
size_t InputSize = Ranges.size();
108-
Collection::const_iterator RangesIt = AddressRanges::insert(Range);
109-
if (RangesIt == Ranges.end())
110-
return;
143+
};
111144

112-
// make Values match to Ranges.
113-
size_t Idx = RangesIt - Ranges.begin();
114-
typename ValuesCollection::iterator ValuesIt = Values.begin() + Idx;
115-
if (InputSize < Ranges.size())
116-
Values.insert(ValuesIt, T());
117-
else if (InputSize > Ranges.size())
118-
Values.erase(ValuesIt, ValuesIt + InputSize - Ranges.size());
119-
assert(Ranges.size() == Values.size());
120-
121-
// set value to the inserted or combined range.
122-
Values[Idx] = Value;
123-
}
124-
size_t size() const {
125-
assert(Ranges.size() == Values.size());
126-
return AddressRanges::size();
127-
}
128-
std::optional<std::pair<AddressRange, T>>
129-
getRangeValueThatContains(uint64_t Addr) const {
130-
Collection::const_iterator It = find(Addr);
131-
if (It == Ranges.end())
132-
return std::nullopt;
145+
class AddressRangeValuePair {
146+
public:
147+
operator AddressRange() const { return Range; }
133148

134-
return std::make_pair(*It, Values[It - Ranges.begin()]);
135-
}
136-
std::pair<AddressRange, T> operator[](size_t Idx) const {
137-
return std::make_pair(Ranges[Idx], Values[Idx]);
138-
}
149+
AddressRange Range;
150+
int64_t Value = 0;
151+
};
139152

140-
protected:
141-
using ValuesCollection = SmallVector<T>;
142-
ValuesCollection Values;
153+
inline bool operator==(const AddressRangeValuePair &LHS,
154+
const AddressRangeValuePair &RHS) {
155+
return LHS.Range == RHS.Range && LHS.Value == RHS.Value;
156+
}
157+
158+
/// AddressRangesMap class maps values to the address ranges.
159+
/// It keeps normalized address ranges and corresponding values.
160+
/// This class keeps a sorted vector of AddressRangeValuePair objects
161+
/// and can perform insertions and searches efficiently.
162+
/// Intersecting([100,200), [150,300)) ranges splitted into non-conflicting
163+
/// parts([100,200), [200,300)). Adjacent([100,200), [200,300)) address
164+
/// ranges are not combined during insertion.
165+
class AddressRangesMap : public AddressRangesBase<AddressRangeValuePair> {
166+
public:
167+
void insert(AddressRange Range, int64_t Value) {
168+
if (Range.empty())
169+
return;
170+
171+
// Search for range which is less than or equal incoming Range.
172+
auto It = std::partition_point(Ranges.begin(), Ranges.end(),
173+
[=](const AddressRangeValuePair &R) {
174+
return R.Range.start() <= Range.start();
175+
});
176+
177+
if (It != Ranges.begin())
178+
It--;
179+
180+
while (!Range.empty()) {
181+
// Inserted range does not overlap with any range.
182+
// Store it into the Ranges collection.
183+
if (It == Ranges.end() || Range.end() <= It->Range.start()) {
184+
Ranges.insert(It, {Range, Value});
185+
return;
186+
}
187+
188+
// Inserted range partially overlaps with current range.
189+
// Store not overlapped part of inserted range.
190+
if (Range.start() < It->Range.start()) {
191+
It = Ranges.insert(It, {{Range.start(), It->Range.start()}, Value});
192+
It++;
193+
Range = {It->Range.start(), Range.end()};
194+
continue;
195+
}
196+
197+
// Inserted range fully overlaps with current range.
198+
if (Range.end() <= It->Range.end())
199+
return;
200+
201+
// Inserted range partially overlaps with current range.
202+
// Remove overlapped part from the inserted range.
203+
if (Range.start() < It->Range.end())
204+
Range = {It->Range.end(), Range.end()};
205+
206+
It++;
207+
}
208+
}
143209
};
144210

145211
} // namespace llvm

llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class DeclContext;
2121

2222
/// Mapped value in the address map is the offset to apply to the
2323
/// linked address.
24-
using RangesTy = AddressRangesMap<int64_t>;
24+
using RangesTy = AddressRangesMap;
2525

2626
// FIXME: Delete this structure.
2727
struct PatchLocation {

llvm/lib/DWARFLinker/DWARFLinker.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,7 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
16591659
DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(),
16601660
OrigDwarf.getDWARFObj().getRangesSection(),
16611661
OrigDwarf.isLittleEndian(), AddressSize);
1662-
std::optional<std::pair<AddressRange, int64_t>> CachedRange;
1662+
std::optional<AddressRangeValuePair> CachedRange;
16631663
DWARFUnit &OrigUnit = Unit.getOrigUnit();
16641664
auto OrigUnitDie = OrigUnit.getUnitDIE(false);
16651665
uint64_t UnitBaseAddress =
@@ -1687,9 +1687,9 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
16871687
}
16881688

16891689
if (!CachedRange ||
1690-
!CachedRange->first.contains(Range.StartAddress + BaseAddress))
1691-
CachedRange = FunctionRanges.getRangeValueThatContains(
1692-
Range.StartAddress + BaseAddress);
1690+
!CachedRange->Range.contains(Range.StartAddress + BaseAddress))
1691+
CachedRange = FunctionRanges.getRangeThatContains(Range.StartAddress +
1692+
BaseAddress);
16931693

16941694
// All range entries should lie in the function range.
16951695
if (!CachedRange) {
@@ -1698,8 +1698,8 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
16981698
}
16991699

17001700
LinkedRanges.insert(
1701-
{Range.StartAddress + BaseAddress + CachedRange->second,
1702-
Range.EndAddress + BaseAddress + CachedRange->second});
1701+
{Range.StartAddress + BaseAddress + CachedRange->Value,
1702+
Range.EndAddress + BaseAddress + CachedRange->Value});
17031703
}
17041704
}
17051705

@@ -1802,7 +1802,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
18021802
// in NewRows.
18031803
std::vector<DWARFDebugLine::Row> Seq;
18041804
const auto &FunctionRanges = Unit.getFunctionRanges();
1805-
std::optional<std::pair<AddressRange, int64_t>> CurrRange;
1805+
std::optional<AddressRangeValuePair> CurrRange;
18061806

18071807
// FIXME: This logic is meant to generate exactly the same output as
18081808
// Darwin's classic dsymutil. There is a nicer way to implement this
@@ -1821,13 +1821,13 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
18211821
// it is marked as end_sequence in the input (because in that
18221822
// case, the relocation offset is accurate and that entry won't
18231823
// serve as the start of another function).
1824-
if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) ||
1825-
(Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) {
1824+
if (!CurrRange || !CurrRange->Range.contains(Row.Address.Address) ||
1825+
(Row.Address.Address == CurrRange->Range.end() && !Row.EndSequence)) {
18261826
// We just stepped out of a known range. Insert a end_sequence
18271827
// corresponding to the end of the range.
18281828
uint64_t StopAddress =
1829-
CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL;
1830-
CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address);
1829+
CurrRange ? CurrRange->Range.end() + CurrRange->Value : -1ULL;
1830+
CurrRange = FunctionRanges.getRangeThatContains(Row.Address.Address);
18311831
if (!CurrRange) {
18321832
if (StopAddress != -1ULL) {
18331833
// Try harder by looking in the Address ranges map.
@@ -1836,9 +1836,9 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
18361836
// for now do as dsymutil.
18371837
// FIXME: Understand exactly what cases this addresses and
18381838
// potentially remove it along with the Ranges map.
1839-
if (std::optional<std::pair<AddressRange, int64_t>> Range =
1840-
Ranges.getRangeValueThatContains(Row.Address.Address))
1841-
StopAddress = Row.Address.Address + (*Range).second;
1839+
if (std::optional<AddressRangeValuePair> Range =
1840+
Ranges.getRangeThatContains(Row.Address.Address))
1841+
StopAddress = Row.Address.Address + (*Range).Value;
18421842
}
18431843
}
18441844
if (StopAddress != -1ULL && !Seq.empty()) {
@@ -1863,7 +1863,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
18631863
continue;
18641864

18651865
// Relocate row address and add it to the current sequence.
1866-
Row.Address.Address += CurrRange->second;
1866+
Row.Address.Address += CurrRange->Value;
18671867
Seq.emplace_back(Row);
18681868

18691869
if (Row.EndSequence)
@@ -2002,8 +2002,8 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
20022002
// the function entry point, thus we can't just lookup the address
20032003
// in the debug map. Use the AddressInfo's range map to see if the FDE
20042004
// describes something that we can relocate.
2005-
std::optional<std::pair<AddressRange, int64_t>> Range =
2006-
Ranges.getRangeValueThatContains(Loc);
2005+
std::optional<AddressRangeValuePair> Range =
2006+
Ranges.getRangeThatContains(Loc);
20072007
if (!Range) {
20082008
// The +4 is to account for the size of the InitialLength field itself.
20092009
InputOffset = EntryOffset + InitialLength + 4;
@@ -2032,7 +2032,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
20322032
// fields that will get reconstructed by emitFDE().
20332033
unsigned FDERemainingBytes = InitialLength - (4 + AddrSize);
20342034
TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize,
2035-
Loc + Range->second,
2035+
Loc + Range->Value,
20362036
FrameData.substr(InputOffset, FDERemainingBytes));
20372037
InputOffset += FDERemainingBytes;
20382038
}

llvm/lib/DWARFLinker/DWARFStreamer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -402,10 +402,9 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit,
402402
// Linked addresses might end up in a different order.
403403
// Build linked address ranges.
404404
AddressRanges LinkedRanges;
405-
for (size_t Idx = 0; Idx < FunctionRanges.size(); Idx++)
405+
for (const AddressRangeValuePair &Range : FunctionRanges)
406406
LinkedRanges.insert(
407-
{FunctionRanges[Idx].first.start() + FunctionRanges[Idx].second,
408-
FunctionRanges[Idx].first.end() + FunctionRanges[Idx].second});
407+
{Range.Range.start() + Range.Value, Range.Range.end() + Range.Value});
409408

410409
if (!FunctionRanges.empty())
411410
emitDwarfDebugArangesTable(Unit, LinkedRanges);

0 commit comments

Comments
 (0)