Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ class DWARFDebugLine {
unsigned LastRowIndex;
bool Empty;

/// The offset into the line table where this sequence begins
uint64_t StmtSeqOffset = UINT64_MAX;

void reset();

static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
Expand All @@ -224,6 +227,8 @@ class DWARFDebugLine {
return SectionIndex == PC.SectionIndex &&
(LowPC <= PC.Address && PC.Address < HighPC);
}

void SetSequenceOffset(uint64_t Offset) { StmtSeqOffset = Offset; }
};

struct LineTable {
Expand All @@ -243,8 +248,20 @@ class DWARFDebugLine {
uint32_t lookupAddress(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;

bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const;
/// Fills the Result argument with the indices of the rows that correspond
/// to the address range specified by \p Address and \p Size.
///
/// \param Address - The starting address of the range.
/// \param Size - The size of the address range.
/// \param Result - The vector to fill with row indices.
/// \param StmtSequenceOffset - if provided, only rows from the sequence
/// starting at the matching offset will be added to the result.
///
/// Returns true if any rows were found.
bool lookupAddressRange(
object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset = std::nullopt) const;

bool hasFileAtIndex(uint64_t FileIndex) const {
return Prologue.hasFileAtIndex(FileIndex);
Expand Down Expand Up @@ -305,8 +322,20 @@ class DWARFDebugLine {
uint32_t lookupAddressImpl(object::SectionedAddress Address,
bool *IsApproximateLine = nullptr) const;

bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const;
/// Fills the Result argument with the indices of the rows that correspond
/// to the address range specified by \p Address and \p Size.
///
/// \param Address - The starting address of the range.
/// \param Size - The size of the address range.
/// \param Result - The vector to fill with row indices.
/// \param StmtSequenceOffset - if provided, only rows from the sequence
/// starting at the matching offset will be added to the result.
///
/// Returns true if any rows were found.
bool
lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset) const;
};

const LineTable *getLineTable(uint64_t Offset) const;
Expand Down Expand Up @@ -376,7 +405,7 @@ class DWARFDebugLine {
ParsingState(struct LineTable *LT, uint64_t TableOffset,
function_ref<void(Error)> ErrorHandler);

void resetRowAndSequence();
void resetRowAndSequence(uint64_t Offset);
void appendRowToMatrix();

struct AddrOpIndexDelta {
Expand Down
36 changes: 26 additions & 10 deletions llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ void DWARFDebugLine::Sequence::reset() {
FirstRowIndex = 0;
LastRowIndex = 0;
Empty = true;
StmtSeqOffset = UINT64_MAX;
}

DWARFDebugLine::LineTable::LineTable() { clear(); }
Expand Down Expand Up @@ -561,13 +562,12 @@ void DWARFDebugLine::LineTable::clear() {
DWARFDebugLine::ParsingState::ParsingState(
struct LineTable *LT, uint64_t TableOffset,
function_ref<void(Error)> ErrorHandler)
: LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {
resetRowAndSequence();
}
: LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {}

void DWARFDebugLine::ParsingState::resetRowAndSequence() {
void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) {
Row.reset(LineTable->Prologue.DefaultIsStmt);
Sequence.reset();
Sequence.SetSequenceOffset(Offset);
}

void DWARFDebugLine::ParsingState::appendRowToMatrix() {
Expand Down Expand Up @@ -848,6 +848,10 @@ Error DWARFDebugLine::LineTable::parse(
*OS << '\n';
Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0);
}
// *OffsetPtr points to the end of the prologue - i.e. the start of the first
// sequence. So initialize the first sequence offset accordingly.
State.resetRowAndSequence(*OffsetPtr);

bool TombstonedAddress = false;
auto EmitRow = [&] {
if (!TombstonedAddress) {
Expand Down Expand Up @@ -912,7 +916,9 @@ Error DWARFDebugLine::LineTable::parse(
// into this code path - if it were invalid, the default case would be
// followed.
EmitRow();
State.resetRowAndSequence();
// Cursor now points to right after the end_sequence opcode - so points
// to the start of the next sequence - if one exists.
State.resetRowAndSequence(Cursor.tell());
break;

case DW_LNE_set_address:
Expand Down Expand Up @@ -1364,23 +1370,25 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address,

bool DWARFDebugLine::LineTable::lookupAddressRange(
object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const {
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset) const {

// Search for relocatable addresses
if (lookupAddressRangeImpl(Address, Size, Result))
if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset))
return true;

if (Address.SectionIndex == object::SectionedAddress::UndefSection)
return false;

// Search for absolute addresses
Address.SectionIndex = object::SectionedAddress::UndefSection;
return lookupAddressRangeImpl(Address, Size, Result);
return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset);
}

bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const {
std::vector<uint32_t> &Result,
std::optional<uint64_t> StmtSequenceOffset) const {
if (Sequences.empty())
return false;
uint64_t EndAddr = Address.Address + Size;
Expand All @@ -1401,6 +1409,14 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(

while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aside: this seems questionable... The LowPC of one sequence isn't necessarily related to the LowPC of another sequence (the linker could reorder them), so I'm not sure what this is meant to do but I'm a bit surprised/not sure it does whatever it is meant to do.

const DWARFDebugLine::Sequence &CurSeq = *SeqPos;

// Skip sequences that don't match our stmt_sequence offset if one was
// provided
if (StmtSequenceOffset && CurSeq.StmtSeqOffset != *StmtSequenceOffset) {
++SeqPos;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might help if the while turned into a for loop (for (; SeqPos != LastSeq && SeqPos->LowPC < EndAddr; ++SeqPos)) so that the increment doesn't have to appear in two places/risk getting out of sync.

Though more generally - since the entries will be ordered by StmtSeqOffset - you could do a binary search (llvm::binary_search) to find the sequence, if the StmtSequenceOffset is specified.

Copy link
Contributor Author

@alx32 alx32 Feb 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switched to binary search approach - though this makes the code a bit more complex I think.

continue;
}

// For the first sequence, we need to find which row in the sequence is the
// first in our range.
uint32_t FirstRowIndex = CurSeq.FirstRowIndex;
Expand All @@ -1423,7 +1439,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
++SeqPos;
}

return true;
return !Result.empty();
}

std::optional<StringRef>
Expand Down
137 changes: 136 additions & 1 deletion llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "DwarfGenerator.h"
#include "DwarfUtils.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
Expand Down Expand Up @@ -2035,4 +2035,139 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) {
EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file"));
}

/// Test that lookupAddressRange correctly filters rows based on
/// DW_AT_LLVM_stmt_sequence.
///
/// This test verifies that:
/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange
/// only returns rows from the sequence starting at the specified offset
/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are
/// returned
/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are
/// returned
///
/// The test creates a line table with two sequences at the same address range
/// but different line numbers. It then creates three subprogram DIEs:
/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence
/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence
/// - One with an invalid DW_AT_LLVM_stmt_sequence offset
TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) {
if (!setupGenerator())
GTEST_SKIP();

// Create new DWARF with the subprogram DIE
dwarfgen::CompileUnit &CU = Gen->addCompileUnit();
dwarfgen::DIE CUDie = CU.getUnitDIE();

CUDie.addAttribute(DW_AT_name, DW_FORM_string, "/tmp/main.c");
CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C);

dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram);
SD1.addAttribute(DW_AT_name, DW_FORM_string, "sub1");
SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
// DW_AT_LLVM_stmt_sequence points to the first sequence
SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e);

dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram);
SD2.addAttribute(DW_AT_name, DW_FORM_string, "sub2");
SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
// DW_AT_LLVM_stmt_sequence points to the second sequence
SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42);

dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram);
SD3.addAttribute(DW_AT_name, DW_FORM_string, "sub3");
SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U);
SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U);
// Invalid DW_AT_LLVM_stmt_sequence
SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66);

// Create a line table with multiple sequences
LineTable &LT = Gen->addLineTable();

// First sequence with addresses 0x1000(Ln100), 0x1004(Ln101)
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}});
LT.addStandardOpcode(DW_LNS_copy, {});
LT.addByte(0x4b); // Special opcode: address += 4, line += 1
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});

// Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201)
LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}});
LT.addStandardOpcode(DW_LNS_set_prologue_end, {});
LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}});
LT.addStandardOpcode(DW_LNS_copy, {});
LT.addByte(0x4b); // Special opcode: address += 4, line += 1
LT.addExtendedOpcode(1, DW_LNE_end_sequence, {});

// Generate the DWARF
generate();

// Parse the line table to get the sequence offset
auto ExpectedLineTable = Line.getOrParseLineTable(
LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable);
ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded());
const auto *Table = *ExpectedLineTable;

uint32_t NumCUs = Context->getNumCompileUnits();
ASSERT_EQ(NumCUs, 1u);
DWARFUnit *Unit = Context->getUnitAtIndex(0);
auto DwarfCUDie = Unit->getUnitDIE(false);

auto Sub1Die = DwarfCUDie.getFirstChild();
auto Sub2Die = Sub1Die.getSibling();
auto Sub3Die = Sub2Die.getSibling();

// Verify Sub1Die is the DIE generated from SD1
auto NameAttr1 = Sub1Die.find(DW_AT_name);
EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1");

// Verify Sub2Die is the DIE generated from SD2
auto NameAttr2 = Sub2Die.find(DW_AT_name);
EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2");

// Verify Sub2Die is the DIE generated from SD3
auto NameAttr3 = Sub3Die.find(DW_AT_name);
EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3");

// Ensure there are two sequences
ASSERT_EQ(Table->Sequences.size(), 2u);

// Lookup addresses in the first sequence with the second sequence's filter
{
std::vector<uint32_t> Rows;
bool Found;

// Look up using Sub3Die's invalid stmt_sequence offset
auto StmtSeqAttr3 = Sub3Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
ASSERT_TRUE(StmtSeqAttr3);
Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
toSectionOffset(StmtSeqAttr3));
EXPECT_FALSE(Found);

// Look up using Sub1Die's valid stmt_sequence offset
auto StmtSeqAttr1 = Sub1Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
ASSERT_TRUE(StmtSeqAttr1);
Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
toSectionOffset(StmtSeqAttr1));
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
EXPECT_EQ(Rows[0], 0U);

// Look up using Sub2Die's valid stmt_sequence offset
Rows.clear();
auto StmtSeqAttr2 = Sub2Die.find(dwarf::DW_AT_LLVM_stmt_sequence);
ASSERT_TRUE(StmtSeqAttr2);
Found = Table->lookupAddressRange(
{0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows,
toSectionOffset(StmtSeqAttr2));
EXPECT_TRUE(Found);
ASSERT_EQ(Rows.size(), 1u);
EXPECT_EQ(Rows[0], 3u);
}
}
} // end anonymous namespace