diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 91ecf89da618c..72c8817daa714 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -288,6 +288,12 @@ class BinaryContext { /// overwritten, but it is okay to re-generate debug info for them. std::set ProcessedCUs; + /// DWARF-related container to manage lifecycle of groups of rows from line + /// tables associated with instructions. Since binary functions can span + /// multiple compilation units, instructions may reference debug line + /// information from multiple CUs. + ClusteredRowsContainer ClusteredRows; + // Setup MCPlus target builder void initializeTarget(std::unique_ptr TargetBuilder) { MIB = std::move(TargetBuilder); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index b59926cc75571..51b139a15e1a0 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -35,6 +35,7 @@ #include "bolt/Core/JumpTable.h" #include "bolt/Core/MCPlus.h" #include "bolt/Utils/NameResolver.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -423,8 +424,9 @@ class BinaryFunction { /// Original LSDA type encoding unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit}; - /// Containing compilation unit for the function. - DWARFUnit *DwarfUnit{nullptr}; + /// All compilation units this function belongs to. + /// Maps DWARF unit offset to the unit pointer. + DenseMap DwarfUnitMap; /// Last computed hash value. Note that the value could be recomputed using /// different parameters by every pass. @@ -2409,15 +2411,21 @@ class BinaryFunction { void computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const; - void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; } + void addDWARFUnit(DWARFUnit *Unit) { DwarfUnitMap[Unit->getOffset()] = Unit; } - /// Return DWARF compile unit for this function. - DWARFUnit *getDWARFUnit() const { return DwarfUnit; } + void removeDWARFUnit(DWARFUnit *Unit) { + DwarfUnitMap.erase(Unit->getOffset()); + } + + /// Return DWARF compile units for this function. + /// Returns a reference to the map of DWARF unit offsets to units. + const DenseMap &getDWARFUnits() const { + return DwarfUnitMap; + } - /// Return line info table for this function. - const DWARFDebugLine::LineTable *getDWARFLineTable() const { - return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit()) - : nullptr; + const DWARFDebugLine::LineTable * + getDWARFLineTableForUnit(DWARFUnit *Unit) const { + return BC.DwCtx->getLineTableForUnit(Unit); } /// Finalize profile for the function. diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h index 6ea3b1af1024f..814978965ce3a 100644 --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -135,8 +135,6 @@ struct DebugLineTableRowRef { uint32_t DwCompileUnitIndex; uint32_t RowIndex; - const static DebugLineTableRowRef NULL_ROW; - bool operator==(const DebugLineTableRowRef &Rhs) const { return DwCompileUnitIndex == Rhs.DwCompileUnitIndex && RowIndex == Rhs.RowIndex; @@ -145,24 +143,6 @@ struct DebugLineTableRowRef { bool operator!=(const DebugLineTableRowRef &Rhs) const { return !(*this == Rhs); } - - static DebugLineTableRowRef fromSMLoc(const SMLoc &Loc) { - union { - decltype(Loc.getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ptr = Loc.getPointer(); - return U.Ref; - } - - SMLoc toSMLoc() const { - union { - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef Ref; - } U; - U.Ref = *this; - return SMLoc::getFromPointer(U.Ptr); - } }; /// Common buffer vector used for debug info handling. @@ -210,7 +190,7 @@ class DebugRangesSectionWriter { static bool classof(const DebugRangesSectionWriter *Writer) { return Writer->getKind() == RangesWriterKind::DebugRangesWriter; } - + /// Append a range to the main buffer. void appendToRangeBuffer(const DebugBufferVector &CUBuffer); @@ -852,6 +832,97 @@ class DwarfLineTable { // Returns DWARF Version for this line table. uint16_t getDwarfVersion() const { return DwarfVersion; } }; + +/// ClusteredRows represents a collection of debug line table row references. +/// +/// MEMORY LAYOUT AND DESIGN: +/// This class uses a flexible array member pattern to store all +/// DebugLineTableRowRef elements in a single contiguous memory allocation. +/// The memory layout is: +/// +/// +------------------+ +/// | ClusteredRows | <- Object header (Size + first element) +/// | - Size | +/// | - Rows (element) | <- First DebugLineTableRowRef element +/// +------------------+ +/// | element[1] | <- Additional DebugLineTableRowRef elements +/// | element[2] | stored immediately after the object +/// | ... | +/// | element[Size-1] | +/// +------------------+ +/// +/// The 'Rows' member serves as both the first element storage and the base +/// address for pointer arithmetic to access subsequent elements. +class ClusteredRows { +public: + ArrayRef getRows() const { + return ArrayRef(beginPtrConst(), Size); + } + + /// Returns the number of elements in the array. + uint64_t size() const { return Size; } + + /// We re-purpose SMLoc inside MCInst to store the pointer + /// to ClusteredRows. fromSMLoc() and toSMLoc() are helper + /// functions to convert between SMLoc and ClusteredRows. + + static const ClusteredRows *fromSMLoc(const SMLoc &Loc) { + return reinterpret_cast(Loc.getPointer()); + } + SMLoc toSMLoc() const { + return SMLoc::getFromPointer(reinterpret_cast(this)); + } + + /// Given a vector of DebugLineTableRowRef, this method + /// copies the elements into pre-allocated memory. + template void populate(const T Vec) { + assert(Vec.size() == Size && "Sizes must match"); + DebugLineTableRowRef *CurRawPtr = beginPtr(); + for (DebugLineTableRowRef RowRef : Vec) { + *CurRawPtr = RowRef; + ++CurRawPtr; + } + } + +private: + uint64_t Size; + DebugLineTableRowRef Rows; + + ClusteredRows(uint64_t Size) : Size(Size) {} + + /// Total size of the object including the array. + static uint64_t getTotalSize(uint64_t Size) { + assert(Size > 0 && "Size must be greater than 0"); + return sizeof(ClusteredRows) + (Size - 1) * sizeof(DebugLineTableRowRef); + } + const DebugLineTableRowRef *beginPtrConst() const { + return reinterpret_cast(&Rows); + } + DebugLineTableRowRef *beginPtr() { + return reinterpret_cast(&Rows); + } + + friend class ClusteredRowsContainer; +}; + +/// ClusteredRowsContainer manages the lifecycle of ClusteredRows objects. +class ClusteredRowsContainer { +public: + ClusteredRows *createClusteredRows(uint64_t Size) { + auto *CR = new (std::malloc(ClusteredRows::getTotalSize(Size))) + ClusteredRows(Size); + Clusters.push_back(CR); + return CR; + } + ~ClusteredRowsContainer() { + for (auto *CR : Clusters) + std::free(CR); + } + +private: + std::vector Clusters; +}; + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index dd0d041692484..e69f0c01d3d20 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1693,22 +1693,39 @@ void BinaryContext::preprocessDebugInfo() { auto It = llvm::partition_point( AllRanges, [=](CURange R) { return R.HighPC <= FunctionAddress; }); - if (It != AllRanges.end() && It->LowPC <= FunctionAddress) - Function.setDWARFUnit(It->Unit); + if (It == AllRanges.end() || It->LowPC > FunctionAddress) { + continue; + } + Function.addDWARFUnit(It->Unit); + + // Go forward and add all units from ranges that cover the function. + while (++It != AllRanges.end()) { + if (It->LowPC > FunctionAddress || FunctionAddress >= It->HighPC) + break; + Function.addDWARFUnit(It->Unit); + } } // Discover units with debug info that needs to be updated. for (const auto &KV : BinaryFunctions) { const BinaryFunction &BF = KV.second; - if (shouldEmit(BF) && BF.getDWARFUnit()) - ProcessedCUs.insert(BF.getDWARFUnit()); + if (shouldEmit(BF) && !BF.getDWARFUnits().empty()) + for (const auto &[_, Unit] : BF.getDWARFUnits()) + ProcessedCUs.insert(Unit); } - // Clear debug info for functions from units that we are not going to process. for (auto &KV : BinaryFunctions) { BinaryFunction &BF = KV.second; - if (BF.getDWARFUnit() && !ProcessedCUs.count(BF.getDWARFUnit())) - BF.setDWARFUnit(nullptr); + // Collect units to remove to avoid iterator invalidation + SmallVector UnitsToRemove; + for (const auto &[_, Unit] : BF.getDWARFUnits()) { + if (!ProcessedCUs.count(Unit)) + UnitsToRemove.push_back(Unit); + } + // Remove the collected units + for (auto *Unit : UnitsToRemove) { + BF.removeDWARFUnit(Unit); + } } if (opts::Verbosity >= 1) { @@ -1903,23 +1920,23 @@ bool BinaryContext::isMarker(const SymbolRef &Symbol) const { static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction, const BinaryFunction *Function, DWARFContext *DwCtx) { - DebugLineTableRowRef RowRef = - DebugLineTableRowRef::fromSMLoc(Instruction.getLoc()); - if (RowRef == DebugLineTableRowRef::NULL_ROW) + const ClusteredRows *LineTableRows = + ClusteredRows::fromSMLoc(Instruction.getLoc()); + if (LineTableRows == nullptr) return; - const DWARFDebugLine::LineTable *LineTable; - if (Function && Function->getDWARFUnit() && - Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) { - LineTable = Function->getDWARFLineTable(); - } else { - LineTable = DwCtx->getLineTableForUnit( - DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); - } - assert(LineTable && "line table expected for instruction with debug info"); + // File name and line number should be the same for all CUs. + // So it is sufficient to check the first one. + DebugLineTableRowRef RowRef = LineTableRows->getRows().front(); + const DWARFDebugLine::LineTable *LineTable = DwCtx->getLineTableForUnit( + DwCtx->getCompileUnitForOffset(RowRef.DwCompileUnitIndex)); + + if (!LineTable) + return; const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1]; StringRef FileName = ""; + if (std::optional FName = dwarf::toString(LineTable->Prologue.getFileNameEntry(Row.File).Name)) FileName = *FName; diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 7b5cd276fee89..7aaf721da9769 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -177,7 +177,8 @@ class BinaryEmitter { /// Note that it does not automatically result in the insertion of the EOS /// marker in the line table program, but provides one to the DWARF generator /// when it needs it. - void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol); + void emitLineInfoEnd(const BinaryFunction &BF, MCSymbol *FunctionEndSymbol, + const DWARFUnit &Unit); /// Emit debug line info for unprocessed functions from CUs that include /// emitted functions. @@ -436,8 +437,9 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, Streamer.emitELFSize(StartSymbol, SizeExpr); } - if (opts::UpdateDebugSections && Function.getDWARFUnit()) - emitLineInfoEnd(Function, EndSymbol); + if (opts::UpdateDebugSections && !Function.getDWARFUnits().empty()) + for (const auto &[_, Unit] : Function.getDWARFUnits()) + emitLineInfoEnd(Function, EndSymbol, *Unit); // Exception handling info for the function. emitLSDA(Function, FF); @@ -486,7 +488,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, // A symbol to be emitted before the instruction to mark its location. MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr); - if (opts::UpdateDebugSections && BF.getDWARFUnit()) { + if (opts::UpdateDebugSections && !BF.getDWARFUnits().empty()) { LastLocSeen = emitLineInfo(BF, Instr.getLoc(), LastLocSeen, FirstInstr, InstrLabel); FirstInstr = false; @@ -679,74 +681,100 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, SMLoc BinaryEmitter::emitLineInfo(const BinaryFunction &BF, SMLoc NewLoc, SMLoc PrevLoc, bool FirstInstr, MCSymbol *&InstrLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - const DWARFDebugLine::LineTable *FunctionLineTable = BF.getDWARFLineTable(); - assert(FunctionCU && "cannot emit line info for function without CU"); - - DebugLineTableRowRef RowReference = DebugLineTableRowRef::fromSMLoc(NewLoc); - - // Check if no new line info needs to be emitted. - if (RowReference == DebugLineTableRowRef::NULL_ROW || + if (NewLoc.getPointer() == nullptr || NewLoc.getPointer() == PrevLoc.getPointer()) return PrevLoc; + const ClusteredRows *Cluster = ClusteredRows::fromSMLoc(NewLoc); + + auto addToLineTable = [&](DebugLineTableRowRef RowReference, + const DWARFUnit &TargetCU, unsigned Flags, + MCSymbol &InstrLabel, + const DWARFDebugLine::Row &CurrentRow) { + const uint64_t TargetUnitIndex = TargetCU.getOffset(); + unsigned TargetFilenum = CurrentRow.File; + const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; + // If the CU id from the current instruction location does not + // match the target CU id, it means that we have come across some + // inlined code (by BOLT). We must look up the CU for the instruction's + // original function and get the line table from that. + if (TargetUnitIndex != CurrentUnitIndex) { + // Add filename from the inlined function to the current CU. + TargetFilenum = BC.addDebugFilenameToUnit( + TargetUnitIndex, CurrentUnitIndex, CurrentRow.File); + } + BC.Ctx->setCurrentDwarfLoc(TargetFilenum, CurrentRow.Line, + CurrentRow.Column, Flags, CurrentRow.Isa, + CurrentRow.Discriminator); + const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); + BC.Ctx->clearDwarfLocSeen(); + const MCLineSection::MCLineDivisionMap &MapLineEntries = + BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .getMCLineEntries(); + const auto *It = MapLineEntries.find(Streamer.getCurrentSectionOnly()); + MCDwarfLineEntry NewLineEntry = MCDwarfLineEntry(&InstrLabel, DwarfLoc); + + // Check if line table exists and has entries before doing comparison. + if (It != MapLineEntries.end() && !It->second.empty()) { + // Check if the new line entry has the same debug info as the last one + // to avoid duplicates. We don't compare labels since different + // instructions can have the same line info. + const auto &LastEntry = It->second.back(); + if (LastEntry.getFileNum() == NewLineEntry.getFileNum() && + LastEntry.getLine() == NewLineEntry.getLine() && + LastEntry.getColumn() == NewLineEntry.getColumn() && + LastEntry.getFlags() == NewLineEntry.getFlags() && + LastEntry.getIsa() == NewLineEntry.getIsa() && + LastEntry.getDiscriminator() == NewLineEntry.getDiscriminator()) + return; + } - unsigned CurrentFilenum = 0; - const DWARFDebugLine::LineTable *CurrentLineTable = FunctionLineTable; - - // If the CU id from the current instruction location does not - // match the CU id from the current function, it means that we - // have come across some inlined code. We must look up the CU - // for the instruction's original function and get the line table - // from that. - const uint64_t FunctionUnitIndex = FunctionCU->getOffset(); - const uint32_t CurrentUnitIndex = RowReference.DwCompileUnitIndex; - if (CurrentUnitIndex != FunctionUnitIndex) { - CurrentLineTable = BC.DwCtx->getLineTableForUnit( - BC.DwCtx->getCompileUnitForOffset(CurrentUnitIndex)); - // Add filename from the inlined function to the current CU. - CurrentFilenum = BC.addDebugFilenameToUnit( - FunctionUnitIndex, CurrentUnitIndex, - CurrentLineTable->Rows[RowReference.RowIndex - 1].File); - } - - const DWARFDebugLine::Row &CurrentRow = - CurrentLineTable->Rows[RowReference.RowIndex - 1]; - if (!CurrentFilenum) - CurrentFilenum = CurrentRow.File; - - unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | - (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | - (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | - (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); - - // Always emit is_stmt at the beginning of function fragment. - if (FirstInstr) - Flags |= DWARF2_FLAG_IS_STMT; - - BC.Ctx->setCurrentDwarfLoc(CurrentFilenum, CurrentRow.Line, CurrentRow.Column, - Flags, CurrentRow.Isa, CurrentRow.Discriminator); - const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); - BC.Ctx->clearDwarfLocSeen(); + BC.getDwarfLineTable(TargetUnitIndex) + .getMCLineSections() + .addLineEntry(NewLineEntry, Streamer.getCurrentSectionOnly()); + }; if (!InstrLabel) InstrLabel = BC.Ctx->createTempSymbol(); - - BC.getDwarfLineTable(FunctionUnitIndex) - .getMCLineSections() - .addLineEntry(MCDwarfLineEntry(InstrLabel, DwarfLoc), - Streamer.getCurrentSectionOnly()); + for (DebugLineTableRowRef RowReference : Cluster->getRows()) { + const DWARFDebugLine::LineTable *CurrentLineTable = + BC.DwCtx->getLineTableForUnit( + BC.DwCtx->getCompileUnitForOffset(RowReference.DwCompileUnitIndex)); + const DWARFDebugLine::Row &CurrentRow = + CurrentLineTable->Rows[RowReference.RowIndex - 1]; + unsigned Flags = (DWARF2_FLAG_IS_STMT * CurrentRow.IsStmt) | + (DWARF2_FLAG_BASIC_BLOCK * CurrentRow.BasicBlock) | + (DWARF2_FLAG_PROLOGUE_END * CurrentRow.PrologueEnd) | + (DWARF2_FLAG_EPILOGUE_BEGIN * CurrentRow.EpilogueBegin); + + // Always emit is_stmt at the beginning of function fragment. + if (FirstInstr) + Flags |= DWARF2_FLAG_IS_STMT; + const auto &FunctionDwarfUnits = BF.getDWARFUnits(); + auto It = FunctionDwarfUnits.find(RowReference.DwCompileUnitIndex); + if (It != FunctionDwarfUnits.end()) { + addToLineTable(RowReference, *It->second, Flags, *InstrLabel, CurrentRow); + continue; + } + // This rows is from CU that did not contain the original function. + // This might happen if BOLT moved/inlined that instruction from other CUs. + // In this case, we need to insert it to all CUs that the function + // originally beloned to. + for (const auto &[_, Unit] : BF.getDWARFUnits()) { + addToLineTable(RowReference, *Unit, Flags, *InstrLabel, CurrentRow); + } + } return NewLoc; } void BinaryEmitter::emitLineInfoEnd(const BinaryFunction &BF, - MCSymbol *FunctionEndLabel) { - DWARFUnit *FunctionCU = BF.getDWARFUnit(); - assert(FunctionCU && "DWARF unit expected"); + MCSymbol *FunctionEndLabel, + const DWARFUnit &Unit) { BC.Ctx->setCurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_END_SEQUENCE, 0, 0); const MCDwarfLoc &DwarfLoc = BC.Ctx->getCurrentDwarfLoc(); BC.Ctx->clearDwarfLocSeen(); - BC.getDwarfLineTable(FunctionCU->getOffset()) + BC.getDwarfLineTable(Unit.getOffset()) .getMCLineSections() .addLineEntry(MCDwarfLineEntry(FunctionEndLabel, DwarfLoc), Streamer.getCurrentSectionOnly()); @@ -1115,36 +1143,40 @@ void BinaryEmitter::emitDebugLineInfoForOriginalFunctions() { if (Function.isEmitted()) continue; - const DWARFDebugLine::LineTable *LineTable = Function.getDWARFLineTable(); - if (!LineTable) - continue; // nothing to update for this function + // Loop through all CUs in the function + for (const auto &[_, Unit] : Function.getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + Function.getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; // nothing to update for this unit + + const uint64_t Address = Function.getAddress(); + std::vector Results; + if (!LineTable->lookupAddressRange( + {Address, object::SectionedAddress::UndefSection}, + Function.getSize(), Results)) + continue; - const uint64_t Address = Function.getAddress(); - std::vector Results; - if (!LineTable->lookupAddressRange( - {Address, object::SectionedAddress::UndefSection}, - Function.getSize(), Results)) - continue; + if (Results.empty()) + continue; - if (Results.empty()) - continue; + // The first row returned could be the last row matching the start + // address. Find the first row with the same address that is not the end + // of the sequence. + uint64_t FirstRow = Results.front(); + while (FirstRow > 0) { + const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; + if (PrevRow.Address.Address != Address || PrevRow.EndSequence) + break; + --FirstRow; + } - // The first row returned could be the last row matching the start address. - // Find the first row with the same address that is not the end of the - // sequence. - uint64_t FirstRow = Results.front(); - while (FirstRow > 0) { - const DWARFDebugLine::Row &PrevRow = LineTable->Rows[FirstRow - 1]; - if (PrevRow.Address.Address != Address || PrevRow.EndSequence) - break; - --FirstRow; + const uint64_t EndOfSequenceAddress = + Function.getAddress() + Function.getMaxSize(); + BC.getDwarfLineTable(Unit->getOffset()) + .addLineTableSequence(LineTable, FirstRow, Results.back(), + EndOfSequenceAddress); } - - const uint64_t EndOfSequenceAddress = - Function.getAddress() + Function.getMaxSize(); - BC.getDwarfLineTable(Function.getDWARFUnit()->getOffset()) - .addLineTableSequence(LineTable, FirstRow, Results.back(), - EndOfSequenceAddress); } // For units that are completely unprocessed, use original debug line contents diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 6cac2d0cca2cb..578a87dc6c09d 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -179,37 +179,29 @@ template static bool emptyRange(const R &Range) { } /// Gets debug line information for the instruction located at the given -/// address in the original binary. The SMLoc's pointer is used -/// to point to this information, which is represented by a -/// DebugLineTableRowRef. The returned pointer is null if no debug line -/// information for this instruction was found. -static SMLoc findDebugLineInformationForInstructionAt( +/// address in the original binary. Returns an optional DebugLineTableRowRef +/// that references the corresponding row in the DWARF line table. Since binary +/// functions can span multiple compilation units, this function helps +/// associate instructions with their debug line information from the +/// appropriate CU. Returns std::nullopt if no debug line information for +/// this instruction was found. +static std::optional +findDebugLineInformationForInstructionAt( uint64_t Address, DWARFUnit *Unit, const DWARFDebugLine::LineTable *LineTable) { - // We use the pointer in SMLoc to store an instance of DebugLineTableRowRef, - // which occupies 64 bits. Thus, we can only proceed if the struct fits into - // the pointer itself. - static_assert( - sizeof(decltype(SMLoc().getPointer())) >= sizeof(DebugLineTableRowRef), - "Cannot fit instruction debug line information into SMLoc's pointer"); - - SMLoc NullResult = DebugLineTableRowRef::NULL_ROW.toSMLoc(); uint32_t RowIndex = LineTable->lookupAddress( {Address, object::SectionedAddress::UndefSection}); if (RowIndex == LineTable->UnknownRowIndex) - return NullResult; + return std::nullopt; assert(RowIndex < LineTable->Rows.size() && "Line Table lookup returned invalid index."); - decltype(SMLoc().getPointer()) Ptr; - DebugLineTableRowRef *InstructionLocation = - reinterpret_cast(&Ptr); - - InstructionLocation->DwCompileUnitIndex = Unit->getOffset(); - InstructionLocation->RowIndex = RowIndex + 1; + DebugLineTableRowRef InstructionLocation; + InstructionLocation.DwCompileUnitIndex = Unit->getOffset(); + InstructionLocation.RowIndex = RowIndex + 1; - return SMLoc::getFromPointer(Ptr); + return InstructionLocation; } static std::string buildSectionName(StringRef Prefix, StringRef Name, @@ -1496,9 +1488,24 @@ Error BinaryFunction::disassemble() { } add_instruction: - if (getDWARFLineTable()) { - Instruction.setLoc(findDebugLineInformationForInstructionAt( - AbsoluteInstrAddr, getDWARFUnit(), getDWARFLineTable())); + if (!getDWARFUnits().empty()) { + SmallVector Rows; + for (const auto &[_, Unit] : getDWARFUnits()) { + const DWARFDebugLine::LineTable *LineTable = + getDWARFLineTableForUnit(Unit); + if (!LineTable) + continue; + if (std::optional RowRef = + findDebugLineInformationForInstructionAt(AbsoluteInstrAddr, + Unit, LineTable)) + Rows.emplace_back(*RowRef); + } + if (!Rows.empty()) { + ClusteredRows *Cluster = + BC.ClusteredRows.createClusteredRows(Rows.size()); + Cluster->populate(Rows); + Instruction.setLoc(Cluster->toSMLoc()); + } } // Record offset of the instruction for profile matching. diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp index 521eb8d91bbc0..e05f28f08572c 100644 --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -101,8 +101,6 @@ std::optional findAttributeInfo(const DWARFDie DIE, return findAttributeInfo(DIE, AbbrevDecl, *Index); } -const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0}; - LLVM_ATTRIBUTE_UNUSED static void printLE64(const std::string &S) { for (uint32_t I = 0, Size = S.size(); I < Size; ++I) { diff --git a/bolt/test/X86/multi-cu-debug-line.s b/bolt/test/X86/multi-cu-debug-line.s new file mode 100644 index 0000000000000..15f49a211e58b --- /dev/null +++ b/bolt/test/X86/multi-cu-debug-line.s @@ -0,0 +1,327 @@ +## Test that BOLT correctly handles debug line information for functions +## that belong to multiple compilation units (e.g., inline functions in +## common header files). This is the assembly version of the multi-cu-debug-line.test. +## The test covers two scenarios: +## 1. Normal processing: .debug_line section shows lines for the function +## in all CUs where it was compiled, with no duplicate rows within CUs +## 2. Functions not processed: When BOLT doesn't process functions (using +## --funcs with nonexistent function), original debug info is preserved + +# REQUIRES: system-linux + +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/multi-cu-file1.s -o %t/multi-cu-file1.o +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/multi-cu-file2.s -o %t/multi-cu-file2.o +# RUN: %clang %cflags %t/multi-cu-file1.o %t/multi-cu-file2.o -o %t.exe -Wl,-q + +## Test 1: Normal BOLT processing (functions are processed/optimized) +# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-line %t.bolt > %t.debug-line.txt +# RUN: FileCheck %s --check-prefix=BASIC --input-file %t.debug-line.txt + +## Check that debug line information is present for both compilation units +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file1.c" +# BASIC: debug_line[{{.*}}] +# BASIC: file_names[{{.*}}]: +# BASIC: name: "{{.*}}multi-cu-file2.c" + +## Use our helper script to create a normalized table without addresses +# RUN: process-debug-line %t.debug-line.txt > %t.normalized-debug-line.txt +# RUN: FileCheck %s --check-prefix=NORMALIZED --input-file %t.normalized-debug-line.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units +# NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.debug-line.txt > %t.header-count.txt +# RUN: FileCheck %s --check-prefix=MULTI-CU --input-file %t.header-count.txt + +## The header should appear in debug line info for multiple CUs +# MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that there are no duplicate line table rows within the same CU +## This verifies the fix for the bug where duplicate entries were created +# RUN: sort %t.normalized-debug-line.txt | uniq -c | \ +# RUN: awk '$1 > 1 {print "DUPLICATE_ROW: " $0}' > %t.duplicates.txt +# RUN: FileCheck %s --check-prefix=NO-DUPLICATES --input-file %t.duplicates.txt --allow-empty + +## Should have no duplicate normalized rows (file should be empty) +## Note: Cross-CU duplicates are expected and valid (same function in different CUs) +## but within-CU duplicates would indicate a bug +# NO-DUPLICATES-NOT: DUPLICATE_ROW + +## Test 2: Functions not processed by BOLT (using --funcs with nonexistent function) +## This tests the code path where BOLT preserves original debug info +# RUN: llvm-bolt %t.exe -o %t.not-emitted.bolt --update-debug-sections --funcs=nonexistent_function +# RUN: llvm-dwarfdump --debug-line %t.not-emitted.bolt > %t.not-emitted.debug-line.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-BASIC --input-file %t.not-emitted.debug-line.txt + +## Check that debug line information is still present for both compilation units when functions aren't processed +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file1.c" +# PRESERVED-BASIC: debug_line[{{.*}}] +# PRESERVED-BASIC: file_names[{{.*}}]: +# PRESERVED-BASIC: name: "{{.*}}multi-cu-file2.c" + +## Create normalized output for the not-emitted case +# RUN: process-debug-line %t.not-emitted.debug-line.txt > %t.not-emitted.normalized.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-NORMALIZED --input-file %t.not-emitted.normalized.txt + +## Check that we have line entries for the inline function (lines 5, 6, 7) from multi-cu-common.h +## in both compilation units (preserved from original) +# PRESERVED-NORMALIZED: multi-cu-file1.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file1.c 7 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 5 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 6 {{[0-9]+}} multi-cu-common.h +# PRESERVED-NORMALIZED: multi-cu-file2.c 7 {{[0-9]+}} multi-cu-common.h + +## Verify that we have line entries for the inline function in multiple CUs (preserved) +## by checking that the header file appears multiple times in different contexts +# RUN: grep -c "multi-cu-common.h" %t.not-emitted.debug-line.txt > %t.preserved-header-count.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MULTI-CU --input-file %t.preserved-header-count.txt + +## The header should appear in debug line info for multiple CUs (preserved from original) +# PRESERVED-MULTI-CU: {{[2-9]|[1-9][0-9]+}} + +## Check that original debug info is preserved for main functions +# RUN: grep "multi-cu-file1.c.*multi-cu-file1.c" %t.not-emitted.normalized.txt > %t.preserved-main.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-MAIN --input-file %t.preserved-main.txt + +# PRESERVED-MAIN: multi-cu-file1.c {{[0-9]+}} {{[0-9]+}} multi-cu-file1.c + +## Check that original debug info is preserved for file2 functions +# RUN: grep "multi-cu-file2.c.*multi-cu-file2.c" %t.not-emitted.normalized.txt > %t.preserved-file2.txt +# RUN: FileCheck %s --check-prefix=PRESERVED-FILE2 --input-file %t.preserved-file2.txt + +# PRESERVED-FILE2: multi-cu-file2.c {{[0-9]+}} {{[0-9]+}} multi-cu-file2.c + +;--- multi-cu-file1.s + .text + .file 1 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-file1.c" + .file 2 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-common.h" + + .globl main + .type main,@function +main: +.Lfunc_begin0: + .loc 1 4 0 + callq common_inline_function + .loc 1 8 0 + retq +.Lfunc_end0: + .size main, .Lfunc_end0-main + + .type common_inline_function,@function +common_inline_function: +.Lfunc_begin1: + .loc 2 5 0 + movl $42, %eax + .loc 2 6 0 + addl $10, %eax + .loc 2 7 0 + retq +.Lfunc_end1: + .size common_inline_function, .Lfunc_end1-common_inline_function + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x30 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 29 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 2 # Abbrev [2] 0x3a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .long .Linfo_string4 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 18.0.0" +.Linfo_string1: + .asciz "/repo/llvm-project/bolt/test/Inputs/multi-cu-file1.c" +.Linfo_string2: + .asciz "/repo/llvm-project" +.Linfo_string3: + .asciz "main" +.Linfo_string4: + .asciz "common_inline_function" + + .section .debug_line,"",@progbits +.Lline_table_start0: + +;--- multi-cu-file2.s + .text + .file 1 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-file2.c" + .file 2 "/repo/llvm-project" "bolt/test/Inputs/multi-cu-common.h" + + .globl helper_function + .type helper_function,@function +helper_function: +.Lfunc_begin0: + .loc 1 4 0 + callq common_inline_function + .loc 1 8 0 + retq +.Lfunc_end0: + .size helper_function, .Lfunc_end0-helper_function + + .type common_inline_function,@function +common_inline_function: +.Lfunc_begin1: + .loc 2 5 0 + movl $42, %eax + .loc 2 6 0 + addl $10, %eax + .loc 2 7 0 + retq +.Lfunc_end1: + .size common_inline_function, .Lfunc_end1-common_inline_function + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x30 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 29 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .byte 2 # Abbrev [2] 0x2a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 4 # DW_AT_decl_line + .byte 2 # Abbrev [2] 0x3a:0x10 DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .long .Linfo_string4 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 18.0.0" +.Linfo_string1: + .asciz "/repo/llvm-project/bolt/test/Inputs/multi-cu-file2.c" +.Linfo_string2: + .asciz "/repo/llvm-project" +.Linfo_string3: + .asciz "helper_function" +.Linfo_string4: + .asciz "common_inline_function" + + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index bef570ba50a04..3299051db4983 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -138,6 +138,7 @@ unresolved="fatal", extra_args=[link_fdata_cmd], ), + ToolSubst("process-debug-line", unresolved="fatal"), ToolSubst("merge-fdata", unresolved="fatal"), ToolSubst("llvm-readobj", unresolved="fatal"), ToolSubst("llvm-dwp", unresolved="fatal"), diff --git a/bolt/test/process-debug-line b/bolt/test/process-debug-line new file mode 100755 index 0000000000000..44cbcd1e5984a --- /dev/null +++ b/bolt/test/process-debug-line @@ -0,0 +1,105 @@ +#!/bin/sh + +# Script to process llvm-dwarfdump --debug-line output and create a normalized table +# Usage: process-debug-line.sh +# +# Output format: CU_FILE LINE COLUMN FILE_NAME [additional_info] +# This strips addresses to make rows unique and adds context about which CU and file each line belongs to + +if [ $# -ne 1 ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +debug_line_file="$1" + +if [ ! -f "$debug_line_file" ]; then + echo "Error: File '$debug_line_file' not found" >&2 + exit 1 +fi + +awk ' +BEGIN { + cu_count = 0 + current_cu_file = "" + # Initialize file names array + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } +} + +# Track debug_line sections (new CU) +/^debug_line\[/ { + cu_count++ + current_cu_file = "" + # Clear file names array for new CU + for (i = 0; i < 100; i++) { + current_file_names[i] = "" + } + next +} + +# Capture file names and their indices +/^file_names\[.*\]:/ { + # Extract file index using simple string operations + line_copy = $0 + gsub(/file_names\[/, "", line_copy) + gsub(/\]:.*/, "", line_copy) + gsub(/[ \t]/, "", line_copy) + file_index = line_copy + + getline # Read the next line which contains the actual filename + # Extract filename from name: "filename" format + if (match($0, /name:[ \t]*"/)) { + filename = $0 + gsub(/.*name:[ \t]*"/, "", filename) + gsub(/".*/, "", filename) + current_file_names[file_index] = filename + + # Extract basename for main CU file (first .c/.cpp/.cc file we see) + if (current_cu_file == "" && match(filename, /\.(c|cpp|cc)$/)) { + cu_filename = filename + gsub(/.*\//, "", cu_filename) + current_cu_file = cu_filename + } + } + next +} + +# Process line table entries +/^0x[0-9a-f]+/ { + # Parse the line entry: Address Line Column File ISA Discriminator OpIndex Flags + if (NF >= 4) { + line = $2 + column = $3 + file_index = $4 + + # Get the filename for this file index + filename = current_file_names[file_index] + if (filename == "") { + filename = "UNKNOWN_FILE_" file_index + } else { + # Extract just the basename + basename = filename + gsub(/.*\//, "", basename) + filename = basename + } + + # Build additional info (flags, etc.) + additional_info = "" + for (i = 8; i <= NF; i++) { + if (additional_info != "") { + additional_info = additional_info " " + } + additional_info = additional_info $i + } + + # Output normalized row: CU_FILE LINE COLUMN FILE_NAME [additional_info] + printf "%s %s %s %s", current_cu_file, line, column, filename + if (additional_info != "") { + printf " %s", additional_info + } + printf "\n" + } +} +' "$debug_line_file" diff --git a/bolt/unittests/Core/CMakeLists.txt b/bolt/unittests/Core/CMakeLists.txt index f10b0d9472067..297dec7449202 100644 --- a/bolt/unittests/Core/CMakeLists.txt +++ b/bolt/unittests/Core/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_bolt_unittest(CoreTests BinaryContext.cpp + ClusteredRows.cpp MCPlusBuilder.cpp MemoryMaps.cpp DynoStats.cpp diff --git a/bolt/unittests/Core/ClusteredRows.cpp b/bolt/unittests/Core/ClusteredRows.cpp new file mode 100644 index 0000000000000..4665022c91fdd --- /dev/null +++ b/bolt/unittests/Core/ClusteredRows.cpp @@ -0,0 +1,141 @@ +//===- bolt/unittest/Core/ClusteredRows.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/DebugData.h" +#include "llvm/Support/SMLoc.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace llvm::bolt; + +namespace { + +class ClusteredRowsTest : public ::testing::Test { +protected: + void SetUp() override { + Container = std::make_unique(); + } + + std::unique_ptr Container; +}; + +TEST_F(ClusteredRowsTest, CreateSingleElement) { + ClusteredRows *CR = Container->createClusteredRows(1); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 1u); + + // Test population with single element + std::vector TestRefs = {{42, 100}}; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 1u); + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 42u); + EXPECT_EQ(Rows[0].RowIndex, 100u); +} + +TEST_F(ClusteredRowsTest, CreateMultipleElements) { + ClusteredRows *CR = Container->createClusteredRows(3); + ASSERT_NE(CR, nullptr); + EXPECT_EQ(CR->size(), 3u); + + // Test population with multiple elements + std::vector TestRefs = {{10, 20}, {30, 40}, {50, 60}}; + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 3u); + + EXPECT_EQ(Rows[0].DwCompileUnitIndex, 10u); + EXPECT_EQ(Rows[0].RowIndex, 20u); + + EXPECT_EQ(Rows[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows[1].RowIndex, 40u); + + EXPECT_EQ(Rows[2].DwCompileUnitIndex, 50u); + EXPECT_EQ(Rows[2].RowIndex, 60u); +} + +TEST_F(ClusteredRowsTest, SMLoc_Conversion) { + ClusteredRows *CR = Container->createClusteredRows(2); + ASSERT_NE(CR, nullptr); + + // Test SMLoc conversion + SMLoc Loc = CR->toSMLoc(); + EXPECT_TRUE(Loc.isValid()); + + // Test round-trip conversion + const ClusteredRows *CR2 = ClusteredRows::fromSMLoc(Loc); + EXPECT_EQ(CR, CR2); + EXPECT_EQ(CR2->size(), 2u); +} + +TEST_F(ClusteredRowsTest, PopulateWithArrayRef) { + ClusteredRows *CR = Container->createClusteredRows(4); + ASSERT_NE(CR, nullptr); + + // Test population with ArrayRef + DebugLineTableRowRef TestArray[] = {{1, 2}, {3, 4}, {5, 6}, {7, 8}}; + ArrayRef TestRefs(TestArray, 4); + CR->populate(TestRefs); + + ArrayRef Rows = CR->getRows(); + EXPECT_EQ(Rows.size(), 4u); + + for (size_t i = 0; i < 4; ++i) { + EXPECT_EQ(Rows[i].DwCompileUnitIndex, TestArray[i].DwCompileUnitIndex); + EXPECT_EQ(Rows[i].RowIndex, TestArray[i].RowIndex); + } +} + +TEST_F(ClusteredRowsTest, MultipleClusteredRows) { + // Test creating multiple ClusteredRows objects + ClusteredRows *CR1 = Container->createClusteredRows(2); + ClusteredRows *CR2 = Container->createClusteredRows(3); + ClusteredRows *CR3 = Container->createClusteredRows(1); + + ASSERT_NE(CR1, nullptr); + ASSERT_NE(CR2, nullptr); + ASSERT_NE(CR3, nullptr); + + // Ensure they are different objects + EXPECT_NE(CR1, CR2); + EXPECT_NE(CR2, CR3); + EXPECT_NE(CR1, CR3); + + // Verify sizes + EXPECT_EQ(CR1->size(), 2u); + EXPECT_EQ(CR2->size(), 3u); + EXPECT_EQ(CR3->size(), 1u); + + // Populate each with different data + std::vector TestRefs1 = {{100, 200}, {300, 400}}; + std::vector TestRefs2 = {{10, 20}, {30, 40}, {50, 60}}; + std::vector TestRefs3 = {{999, 888}}; + + CR1->populate(TestRefs1); + CR2->populate(TestRefs2); + CR3->populate(TestRefs3); + + // Verify data integrity + ArrayRef Rows1 = CR1->getRows(); + ArrayRef Rows2 = CR2->getRows(); + ArrayRef Rows3 = CR3->getRows(); + + EXPECT_EQ(Rows1[0].DwCompileUnitIndex, 100u); + EXPECT_EQ(Rows1[1].RowIndex, 400u); + + EXPECT_EQ(Rows2[1].DwCompileUnitIndex, 30u); + EXPECT_EQ(Rows2[2].RowIndex, 60u); + + EXPECT_EQ(Rows3[0].DwCompileUnitIndex, 999u); + EXPECT_EQ(Rows3[0].RowIndex, 888u); +} + +} // namespace