Skip to content
Merged
6 changes: 6 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,12 @@ class BinaryContext {
/// overwritten, but it is okay to re-generate debug info for them.
std::set<const DWARFUnit *> ProcessedCUs;

/// DWARF-related container to manage lifecycle of groups of rows from line
/// tables associated with instructions. Since binary functions can span
/// multiple compilation units, instructions may reference debug line
/// information from multiple CUs.
ClusteredRowsContainer ClusteredRows;

// Setup MCPlus target builder
void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
MIB = std::move(TargetBuilder);
Expand Down
26 changes: 17 additions & 9 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "bolt/Core/JumpTable.h"
#include "bolt/Core/MCPlus.h"
#include "bolt/Utils/NameResolver.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
Expand Down Expand Up @@ -423,8 +424,9 @@ class BinaryFunction {
/// Original LSDA type encoding
unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit};

/// Containing compilation unit for the function.
DWARFUnit *DwarfUnit{nullptr};
/// All compilation units this function belongs to.
/// Maps DWARF unit offset to the unit pointer.
DenseMap<uint64_t, DWARFUnit *> DwarfUnitMap;

/// Last computed hash value. Note that the value could be recomputed using
/// different parameters by every pass.
Expand Down Expand Up @@ -2409,15 +2411,21 @@ class BinaryFunction {
void
computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const;

void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; }
void addDWARFUnit(DWARFUnit *Unit) { DwarfUnitMap[Unit->getOffset()] = Unit; }

/// Return DWARF compile unit for this function.
DWARFUnit *getDWARFUnit() const { return DwarfUnit; }
void removeDWARFUnit(DWARFUnit *Unit) {
DwarfUnitMap.erase(Unit->getOffset());
}

/// Return DWARF compile units for this function.
/// Returns a reference to the map of DWARF unit offsets to units.
const DenseMap<uint64_t, DWARFUnit *> &getDWARFUnits() const {
return DwarfUnitMap;
}

/// Return line info table for this function.
const DWARFDebugLine::LineTable *getDWARFLineTable() const {
return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit())
: nullptr;
const DWARFDebugLine::LineTable *
getDWARFLineTableForUnit(DWARFUnit *Unit) const {
return BC.DwCtx->getLineTableForUnit(Unit);
}

/// Finalize profile for the function.
Expand Down
113 changes: 92 additions & 21 deletions bolt/include/bolt/Core/DebugData.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,6 @@ struct DebugLineTableRowRef {
uint32_t DwCompileUnitIndex;
uint32_t RowIndex;

const static DebugLineTableRowRef NULL_ROW;

bool operator==(const DebugLineTableRowRef &Rhs) const {
return DwCompileUnitIndex == Rhs.DwCompileUnitIndex &&
RowIndex == Rhs.RowIndex;
Expand All @@ -145,24 +143,6 @@ struct DebugLineTableRowRef {
bool operator!=(const DebugLineTableRowRef &Rhs) const {
return !(*this == Rhs);
}

static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) {
union {
decltype(Loc.getPointer()) Ptr;
DebugLineTableRowRef Ref;
} U;
U.Ptr = Loc.getPointer();
return U.Ref;
}

SMLoc toSMLoc() const {
union {
decltype(SMLoc().getPointer()) Ptr;
DebugLineTableRowRef Ref;
} U;
U.Ref = *this;
return SMLoc::getFromPointer(U.Ptr);
}
};

/// Common buffer vector used for debug info handling.
Expand Down Expand Up @@ -210,7 +190,7 @@ class DebugRangesSectionWriter {
static bool classof(const DebugRangesSectionWriter *Writer) {
return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
}

/// Append a range to the main buffer.
void appendToRangeBuffer(const DebugBufferVector &CUBuffer);

Expand Down Expand Up @@ -852,6 +832,97 @@ class DwarfLineTable {
// Returns DWARF Version for this line table.
uint16_t getDwarfVersion() const { return DwarfVersion; }
};

/// ClusteredRows represents a collection of debug line table row references.
///
/// MEMORY LAYOUT AND DESIGN:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this actually necessary? Why not just have a SmallVector/std::vector that stores all the elements?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original code used a very hacky approach by reinterpreting a pointer as a struct. I assumed this was done to minimize memory usage. Therefore, I tried to keep any regression in memory consumption and performance—caused by extra pointer dereferencing—to a minimum.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, @grigorypas! Did you measure build time and memory consumption before/after the change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tested on a large internal service at Meta. There was no noticeable difference in memory consumption and runtime.

/// This class uses a flexible array member pattern to store all
/// DebugLineTableRowRef elements in a single contiguous memory allocation.
/// The memory layout is:
///
/// +------------------+
/// | ClusteredRows | <- Object header (Size + first element)
/// | - Size |
/// | - Rows (element) | <- First DebugLineTableRowRef element
/// +------------------+
/// | element[1] | <- Additional DebugLineTableRowRef elements
/// | element[2] | stored immediately after the object
/// | ... |
/// | element[Size-1] |
/// +------------------+
///
/// The 'Rows' member serves as both the first element storage and the base
/// address for pointer arithmetic to access subsequent elements.
class ClusteredRows {
public:
ArrayRef<DebugLineTableRowRef> getRows() const {
return ArrayRef<DebugLineTableRowRef>(beginPtrConst(), Size);
}

/// Returns the number of elements in the array.
uint64_t size() const { return Size; }

/// We re-purpose SMLoc inside MCInst to store the pointer
/// to ClusteredRows. fromSMLoc() and toSMLoc() are helper
/// functions to convert between SMLoc and ClusteredRows.

static const ClusteredRows *fromSMLoc(const SMLoc &Loc) {
return reinterpret_cast<const ClusteredRows *>(Loc.getPointer());
}
SMLoc toSMLoc() const {
return SMLoc::getFromPointer(reinterpret_cast<const char *>(this));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reinterpret_cast

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We cannot cast directly to SMLoc. SMLoc::getFromPointer is the class method to cast from const char*, so it should be future proof.

}

/// Given a vector of DebugLineTableRowRef, this method
/// copies the elements into pre-allocated memory.
template <typename T> void populate(const T Vec) {
assert(Vec.size() == Size && "Sizes must match");
DebugLineTableRowRef *CurRawPtr = beginPtr();
for (DebugLineTableRowRef RowRef : Vec) {
*CurRawPtr = RowRef;
++CurRawPtr;
}
}

private:
uint64_t Size;
DebugLineTableRowRef Rows;

ClusteredRows(uint64_t Size) : Size(Size) {}

/// Total size of the object including the array.
static uint64_t getTotalSize(uint64_t Size) {
assert(Size > 0 && "Size must be greater than 0");
return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef);
}
const DebugLineTableRowRef *beginPtrConst() const {
return reinterpret_cast<const DebugLineTableRowRef *>(&Rows);
}
DebugLineTableRowRef *beginPtr() {
return reinterpret_cast<DebugLineTableRowRef *>(&Rows);
}

friend class ClusteredRowsContainer;
};

/// ClusteredRowsContainer manages the lifecycle of ClusteredRows objects.
class ClusteredRowsContainer {
public:
ClusteredRows *createClusteredRows(uint64_t Size) {
auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size)))
ClusteredRows(Size);
Clusters.push_back(CR);
return CR;
}
~ClusteredRowsContainer() {
for (auto *CR : Clusters)
std::free(CR);
}

private:
std::vector<ClusteredRows *> Clusters;
};

} // namespace bolt
} // namespace llvm

Expand Down
55 changes: 36 additions & 19 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1693,22 +1693,39 @@ void BinaryContext::preprocessDebugInfo() {

auto It = llvm::partition_point(
AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; });
if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
Function.setDWARFUnit(It->Unit);
if (It == AllRanges.end() || It->LowPC > FunctionAddress) {
continue;
}
Function.addDWARFUnit(It->Unit);

// Go forward and add all units from ranges that cover the function.
while (++It != AllRanges.end()) {
if (It->LowPC > FunctionAddress || FunctionAddress >= It->HighPC)
break;
Function.addDWARFUnit(It->Unit);
}
}

// Discover units with debug info that needs to be updated.
for (const auto &KV : BinaryFunctions) {
const BinaryFunction &BF = KV.second;
if (shouldEmit(BF) && BF.getDWARFUnit())
ProcessedCUs.insert(BF.getDWARFUnit());
if (shouldEmit(BF) && !BF.getDWARFUnits().empty())
for (const auto &[_, Unit] : BF.getDWARFUnits())
ProcessedCUs.insert(Unit);
}

// Clear debug info for functions from units that we are not going to process.
for (auto &KV : BinaryFunctions) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like these two loops can be combined into one?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need two passes. If CU is not added to ProcessedCUs because BF is skipped, it does not mean that this CU must be removed. There might be another BF that belongs to that CU and needs processing.

BinaryFunction &BF = KV.second;
if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit()))
BF.setDWARFUnit(nullptr);
// Collect units to remove to avoid iterator invalidation
SmallVector<DWARFUnit *, 1> UnitsToRemove;
for (const auto &[_, Unit] : BF.getDWARFUnits()) {
if (!ProcessedCUs.count(Unit))
UnitsToRemove.push_back(Unit);
}
// Remove the collected units
for (auto *Unit : UnitsToRemove) {
BF.removeDWARFUnit(Unit);
}
}

if (opts::Verbosity >= 1) {
Expand Down Expand Up @@ -1903,23 +1920,23 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
const BinaryFunction *Function,
DWARFContext *DwCtx) {
DebugLineTableRowRef RowRef =
DebugLineTableRowRef::fromSMLoc(Instruction.getLoc());
if (RowRef == DebugLineTableRowRef::NULL_ROW)
const ClusteredRows *LineTableRows =
ClusteredRows::fromSMLoc(Instruction.getLoc());
if (LineTableRows == nullptr)
return;

const DWARFDebugLine::LineTable *LineTable;
if (Function && Function->getDWARFUnit() &&
Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
LineTable = Function->getDWARFLineTable();
} else {
LineTable = DwCtx->getLineTableForUnit(
DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));
}
assert(LineTable && "line table expected for instruction with debug info");
// File name and line number should be the same for all CUs.
// So it is sufficient to check the first one.
DebugLineTableRowRef RowRef = LineTableRows->getRows().front();
const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit(
DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex));

if (!LineTable)
return;

const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
StringRef FileName = "";

if (std::optional<const char *> FName =
dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name))
FileName = *FName;
Expand Down
Loading
Loading