-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[BOLT] Fix debug line emission for functions in multiple compilation units #151230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e95394d
53146d8
b1d229a
15ef19f
b2297d0
76fabbd
13e82ab
63ee4a3
1c08371
e80b4aa
c3a6c8b
7552b8f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,8 +135,6 @@ struct DebugLineTableRowRef { | |
| uint32_t DwCompileUnitIndex; | ||
| uint32_t RowIndex; | ||
|
|
||
| const static DebugLineTableRowRef NULL_ROW; | ||
|
|
||
| bool operator==(const DebugLineTableRowRef &Rhs) const { | ||
| return DwCompileUnitIndex == Rhs.DwCompileUnitIndex && | ||
| RowIndex == Rhs.RowIndex; | ||
|
|
@@ -145,24 +143,6 @@ struct DebugLineTableRowRef { | |
| bool operator!=(const DebugLineTableRowRef &Rhs) const { | ||
| return !(*this == Rhs); | ||
| } | ||
|
|
||
| static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) { | ||
| union { | ||
| decltype(Loc.getPointer()) Ptr; | ||
| DebugLineTableRowRef Ref; | ||
| } U; | ||
| U.Ptr = Loc.getPointer(); | ||
| return U.Ref; | ||
| } | ||
|
|
||
| SMLoc toSMLoc() const { | ||
| union { | ||
| decltype(SMLoc().getPointer()) Ptr; | ||
| DebugLineTableRowRef Ref; | ||
| } U; | ||
| U.Ref = *this; | ||
| return SMLoc::getFromPointer(U.Ptr); | ||
| } | ||
| }; | ||
|
|
||
| /// Common buffer vector used for debug info handling. | ||
|
|
@@ -210,7 +190,7 @@ class DebugRangesSectionWriter { | |
| static bool classof(const DebugRangesSectionWriter *Writer) { | ||
| return Writer->getKind() == RangesWriterKind::DebugRangesWriter; | ||
| } | ||
|
|
||
| /// Append a range to the main buffer. | ||
| void appendToRangeBuffer(const DebugBufferVector &CUBuffer); | ||
|
|
||
|
|
@@ -852,6 +832,97 @@ class DwarfLineTable { | |
| // Returns DWARF Version for this line table. | ||
| uint16_t getDwarfVersion() const { return DwarfVersion; } | ||
| }; | ||
|
|
||
| /// ClusteredRows represents a collection of debug line table row references. | ||
| /// | ||
| /// MEMORY LAYOUT AND DESIGN: | ||
| /// This class uses a flexible array member pattern to store all | ||
| /// DebugLineTableRowRef elements in a single contiguous memory allocation. | ||
| /// The memory layout is: | ||
| /// | ||
| /// +------------------+ | ||
| /// | ClusteredRows | <- Object header (Size + first element) | ||
| /// | - Size | | ||
| /// | - Rows (element) | <- First DebugLineTableRowRef element | ||
| /// +------------------+ | ||
| /// | element[1] | <- Additional DebugLineTableRowRef elements | ||
| /// | element[2] | stored immediately after the object | ||
| /// | ... | | ||
| /// | element[Size-1] | | ||
| /// +------------------+ | ||
| /// | ||
| /// The 'Rows' member serves as both the first element storage and the base | ||
| /// address for pointer arithmetic to access subsequent elements. | ||
| class ClusteredRows { | ||
| public: | ||
grigorypas marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ArrayRef<DebugLineTableRowRef> getRows() const { | ||
| return ArrayRef<DebugLineTableRowRef>(beginPtrConst(), Size); | ||
| } | ||
|
|
||
| /// Returns the number of elements in the array. | ||
| uint64_t size() const { return Size; } | ||
|
|
||
| /// We re-purpose SMLoc inside MCInst to store the pointer | ||
| /// to ClusteredRows. fromSMLoc() and toSMLoc() are helper | ||
| /// functions to convert between SMLoc and ClusteredRows. | ||
|
|
||
| static const ClusteredRows *fromSMLoc(const SMLoc &Loc) { | ||
| return reinterpret_cast<const ClusteredRows *>(Loc.getPointer()); | ||
| } | ||
| SMLoc toSMLoc() const { | ||
| return SMLoc::getFromPointer(reinterpret_cast<const char *>(this)); | ||
|
||
| } | ||
|
|
||
| /// Given a vector of DebugLineTableRowRef, this method | ||
| /// copies the elements into pre-allocated memory. | ||
| template <typename T> void populate(const T Vec) { | ||
| assert(Vec.size() == Size && "Sizes must match"); | ||
| DebugLineTableRowRef *CurRawPtr = beginPtr(); | ||
| for (DebugLineTableRowRef RowRef : Vec) { | ||
| *CurRawPtr = RowRef; | ||
| ++CurRawPtr; | ||
| } | ||
| } | ||
|
|
||
| private: | ||
| uint64_t Size; | ||
| DebugLineTableRowRef Rows; | ||
|
|
||
| ClusteredRows(uint64_t Size) : Size(Size) {} | ||
|
|
||
| /// Total size of the object including the array. | ||
| static uint64_t getTotalSize(uint64_t Size) { | ||
| assert(Size > 0 && "Size must be greater than 0"); | ||
| return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef); | ||
| } | ||
| const DebugLineTableRowRef *beginPtrConst() const { | ||
| return reinterpret_cast<const DebugLineTableRowRef *>(&Rows); | ||
| } | ||
| DebugLineTableRowRef *beginPtr() { | ||
| return reinterpret_cast<DebugLineTableRowRef *>(&Rows); | ||
| } | ||
|
|
||
| friend class ClusteredRowsContainer; | ||
| }; | ||
|
|
||
| /// ClusteredRowsContainer manages the lifecycle of ClusteredRows objects. | ||
| class ClusteredRowsContainer { | ||
| public: | ||
| ClusteredRows *createClusteredRows(uint64_t Size) { | ||
| auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size))) | ||
| ClusteredRows(Size); | ||
| Clusters.push_back(CR); | ||
| return CR; | ||
| } | ||
| ~ClusteredRowsContainer() { | ||
| for (auto *CR : Clusters) | ||
| std::free(CR); | ||
| } | ||
|
|
||
| private: | ||
| std::vector<ClusteredRows *> Clusters; | ||
| }; | ||
|
|
||
| } // namespace bolt | ||
| } // namespace llvm | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1693,22 +1693,39 @@ void BinaryContext::preprocessDebugInfo() { | |
|
|
||
| auto It = llvm::partition_point( | ||
| AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); | ||
| if (It != AllRanges.end() && It->LowPC <= FunctionAddress) | ||
| Function.setDWARFUnit(It->Unit); | ||
| if (It == AllRanges.end() || It->LowPC > FunctionAddress) { | ||
| continue; | ||
| } | ||
| Function.addDWARFUnit(It->Unit); | ||
|
|
||
| // Go forward and add all units from ranges that cover the function. | ||
| while (++It != AllRanges.end()) { | ||
| if (It->LowPC > FunctionAddress || FunctionAddress >= It->HighPC) | ||
| break; | ||
| Function.addDWARFUnit(It->Unit); | ||
| } | ||
| } | ||
|
|
||
| // Discover units with debug info that needs to be updated. | ||
| for (const auto &KV : BinaryFunctions) { | ||
| const BinaryFunction &BF = KV.second; | ||
| if (shouldEmit(BF) && BF.getDWARFUnit()) | ||
| ProcessedCUs.insert(BF.getDWARFUnit()); | ||
| if (shouldEmit(BF) && !BF.getDWARFUnits().empty()) | ||
| for (const auto &[_, Unit] : BF.getDWARFUnits()) | ||
| ProcessedCUs.insert(Unit); | ||
| } | ||
|
|
||
| // Clear debug info for functions from units that we are not going to process. | ||
| for (auto &KV : BinaryFunctions) { | ||
|
||
| BinaryFunction &BF = KV.second; | ||
| if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) | ||
| BF.setDWARFUnit(nullptr); | ||
| // Collect units to remove to avoid iterator invalidation | ||
| SmallVector<DWARFUnit *, 1> UnitsToRemove; | ||
| for (const auto &[_, Unit] : BF.getDWARFUnits()) { | ||
| if (!ProcessedCUs.count(Unit)) | ||
| UnitsToRemove.push_back(Unit); | ||
| } | ||
| // Remove the collected units | ||
| for (auto *Unit : UnitsToRemove) { | ||
| BF.removeDWARFUnit(Unit); | ||
| } | ||
| } | ||
|
|
||
| if (opts::Verbosity >= 1) { | ||
|
|
@@ -1903,23 +1920,23 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const { | |
| static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, | ||
| const BinaryFunction *Function, | ||
| DWARFContext *DwCtx) { | ||
| DebugLineTableRowRef RowRef = | ||
| DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); | ||
| if (RowRef == DebugLineTableRowRef::NULL_ROW) | ||
| const ClusteredRows *LineTableRows = | ||
| ClusteredRows::fromSMLoc(Instruction.getLoc()); | ||
| if (LineTableRows == nullptr) | ||
| return; | ||
|
|
||
| const DWARFDebugLine::LineTable *LineTable; | ||
| if (Function && Function->getDWARFUnit() && | ||
| Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { | ||
| LineTable = Function->getDWARFLineTable(); | ||
| } else { | ||
| LineTable = DwCtx->getLineTableForUnit( | ||
| DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); | ||
| } | ||
| assert(LineTable && "line table expected for instruction with debug info"); | ||
| // File name and line number should be the same for all CUs. | ||
| // So it is sufficient to check the first one. | ||
| DebugLineTableRowRef RowRef = LineTableRows->getRows().front(); | ||
| const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit( | ||
| DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); | ||
|
|
||
| if (!LineTable) | ||
| return; | ||
|
|
||
| const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; | ||
| StringRef FileName = ""; | ||
|
|
||
| if (std::optional<const char *> FName = | ||
| dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name)) | ||
| FileName = *FName; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this actually necessary? Why not just have a SmallVector/std::vector that stores all the elements?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The original code used a very hacky approach by reinterpreting a pointer as a struct. I assumed this was done to minimize memory usage. Therefore, I tried to keep any regression in memory consumption and performance—caused by extra pointer dereferencing—to a minimum.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, @grigorypas! Did you measure build time and memory consumption before/after the change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tested on a large internal service at Meta. There was no noticeable difference in memory consumption and runtime.