From 2bf5eb64b4b320289939bbad204e079e6e2a199c Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 7 Apr 2025 15:46:30 +0100 Subject: [PATCH 1/7] [MC][DebugInfo] Emit linetable entries with known offsets immediately DWARF linetable entries are usually emitted as a sequence of MCDwarfLineAddrFragment fragments containing the line-number difference and an MCExpr describing the instruction-range the linetable entry covers. These then get relaxed during assembly emission. However, a large number of these instruction-range expressions are ranges within a fixed MCDataFragment, i.e. a range over fixed-size instructions that are not subject to relaxation at a later stage. Thus, we can compute the address-delta immediately, and not spend time and memory describing that computation so it can be deferred. --- llvm/lib/MC/MCObjectStreamer.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index e9295f0e21dd3..bb920e4f72db2 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -464,6 +464,22 @@ void MCObjectStreamer::emitDwarfAdvanceLineAddr(int64_t LineDelta, Label, PointerSize); return; } + + // If the two labels are within the same fragment and it's a plain data + // fragment, then the address-offset is already a fixed constant and is not + // relaxable. Emit the advance-line-addr data immediately to save time and + // memory. + MCFragment *LastFrag = LastLabel->getFragment(); + if (LastFrag->getKind() == MCFragment::FT_Data && + Label->getFragment() == LastFrag) { + uint64_t AddrDelta = Label->getOffset() - LastLabel->getOffset(); + SmallString<16> Tmp; + MCDwarfLineAddr::encode(getContext(), Assembler->getDWARFLinetableParams(), + LineDelta, AddrDelta, Tmp); + emitBytes(Tmp); + return; + } + const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel, SMLoc()); insert(getContext().allocFragment(LineDelta, *AddrDelta)); From efd036813fb764025b7b8a75a6e31c257fd984ca Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 7 Apr 2025 16:50:19 +0100 Subject: [PATCH 2/7] Disable this performance opt on RISCV See the comments on bbea64250f6548, the RISCV relocation model does something differently, and thus we need to emit linetables in a fully symbolic way. --- llvm/lib/MC/MCObjectStreamer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index bb920e4f72db2..72c3b10e6e7a8 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -469,8 +469,10 @@ void MCObjectStreamer::emitDwarfAdvanceLineAddr(int64_t LineDelta, // fragment, then the address-offset is already a fixed constant and is not // relaxable. Emit the advance-line-addr data immediately to save time and // memory. + // As per commit bbea64250f6548, RISCV always desires symbolic relocations. MCFragment *LastFrag = LastLabel->getFragment(); - if (LastFrag->getKind() == MCFragment::FT_Data && + if (!getAssembler().getContext().getTargetTriple().isRISCV() && + LastFrag->getKind() == MCFragment::FT_Data && Label->getFragment() == LastFrag) { uint64_t AddrDelta = Label->getOffset() - LastLabel->getOffset(); SmallString<16> Tmp; From 174d5b76811fc332b34972b7551dfc632856a6ae Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 7 May 2025 15:58:12 +0100 Subject: [PATCH 3/7] Use absoluteSymbolDiff, add target hook for whether to prefer using relocs Specifically: bbea64250f6 took away a hook for whether a target wanted symbol differences to be expressed in relocations or not, and here I've found a use case for it. --- llvm/include/llvm/MC/MCAsmBackend.h | 4 ++++ llvm/lib/MC/MCObjectStreamer.cpp | 19 ++++++++----------- .../RISCV/MCTargetDesc/RISCVAsmBackend.h | 14 ++++++++++++++ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index 691988ae1f1df..bdbd4fed38720 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -135,6 +135,10 @@ class MCAsmBackend { uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const = 0; + /// Check whether the given target requires emitting differences of two + /// symbols as a set of relocations. + virtual bool requiresDiffExpressionRelocations() const { return false; } + /// @} /// \name Target Relaxation Interfaces diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 72c3b10e6e7a8..df45f639c0581 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -469,17 +469,14 @@ void MCObjectStreamer::emitDwarfAdvanceLineAddr(int64_t LineDelta, // fragment, then the address-offset is already a fixed constant and is not // relaxable. Emit the advance-line-addr data immediately to save time and // memory. - // As per commit bbea64250f6548, RISCV always desires symbolic relocations. - MCFragment *LastFrag = LastLabel->getFragment(); - if (!getAssembler().getContext().getTargetTriple().isRISCV() && - LastFrag->getKind() == MCFragment::FT_Data && - Label->getFragment() == LastFrag) { - uint64_t AddrDelta = Label->getOffset() - LastLabel->getOffset(); - SmallString<16> Tmp; - MCDwarfLineAddr::encode(getContext(), Assembler->getDWARFLinetableParams(), - LineDelta, AddrDelta, Tmp); - emitBytes(Tmp); - return; + if (!Assembler->getBackend().requiresDiffExpressionRelocations()) { + if (auto OptAddrDelta = absoluteSymbolDiff(Label, LastLabel)) { + SmallString<16> Tmp; + MCDwarfLineAddr::encode(getContext(), Assembler->getDWARFLinetableParams(), + LineDelta, *OptAddrDelta, Tmp); + emitBytes(Tmp); + return; + } } const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel, SMLoc()); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 5db55ad0b8567..0ad5be40b4233 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -35,6 +35,20 @@ class RISCVAsmBackend : public MCAsmBackend { void setForceRelocs() { ForceRelocs = true; } + // Returns true if relocations will be forced for shouldForceRelocation by + // default. This will be true if relaxation is enabled or had previously + // been enabled. + bool willForceRelocations() const { + return ForceRelocs || STI.getFeatureBits()[RISCV::FeatureRelax]; + } + + // Generate diff expression relocations if the relax feature is enabled or had + // previously been enabled, otherwise it is safe for the assembler to + // calculate these internally. + bool requiresDiffExpressionRelocations() const override { + return willForceRelocations(); + } + // Return Size with extra Nop Bytes for alignment directive in code section. bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, unsigned &Size) override; From e39cb1264ac1443c52124ff7f036fa21ea5a78e7 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 7 May 2025 16:01:36 +0100 Subject: [PATCH 4/7] Test that simple .texts can have .debug_line emitted in one data fragment --- .../X86/debug-line-in-one-fragment.ll | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll diff --git a/llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll b/llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll new file mode 100644 index 0000000000000..e24e9787a05b3 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll @@ -0,0 +1,94 @@ +; RUN: llc %s -o %t.o -filetype=obj +; RUN: llvm-dwarfdump --debug-line %t.o | FileCheck %s --check-prefix=LINES +; RUN: llc %s -o %t.o -filetype=obj -debug-only=mc-dump 2>&1 | FileCheck %s --check-prefix=FRAGMENTS + +;; Test (using mc-dump debug output) that .debug_line can be arranged in memory +;; using a single data fragment for a simple function, instead of using multiple +;; MCDwarfFragment fragments in un-necessary cirucmstances. Some targets want +;; multiple fragments so that they can linker-relax the linetable, but x86 +;; doesn't. +;; +;; First, sanity check that the linetable output is as expected, + +; LINES: Address Line Column File ISA Discriminator OpIndex Flags +; LINES-NEXT: ------------------ ------ ------ ------ --- ------------- ------- ------------- +; LINES-NEXT: 0x0000000000000000 3 5 0 0 0 0 is_stmt prologue_end +; LINES-NEXT: 0x0000000000000003 4 12 0 0 0 0 is_stmt +; LINES-NEXT: 0x0000000000000007 4 3 0 0 0 0 +; LINES-NEXT: 0x0000000000000008 4 3 0 0 0 0 end_sequence + +;; Here's a typical example of .debug_line in a suboptimal arrangement: for each +;; address-delta there's an MCDwarfFragment computing the delta during +;; relaxation. +;; +;; +;; AddrDelta:- LineDelta:1>, +;; +;; Contents:[05,03,06] (3 bytes)>, +;; +;; AddrDelta:- LineDelta:0>, +;; +;; AddrDelta:- LineDelta:9223372036854775807>, +;; +;; Contents:[] (0 bytes)>]>, +;; +;; The function in question is made of a single data fragment where the address +;; deltas are known at assembly time. We can (and should) emit .debug_line as a +;; single data fragment. (Check that we see one data fragment, then no more +;; fragments until the next section). +; +; FRAGMENTS: Date: Thu, 8 May 2025 10:45:01 +0100 Subject: [PATCH 5/7] Remove requiresDiffExpressionRelocations target hook Turns out this isn't needed, each fragment records whether it's relaxable --- llvm/include/llvm/MC/MCAsmBackend.h | 4 ---- llvm/lib/MC/MCObjectStreamer.cpp | 14 ++++++-------- .../Target/RISCV/MCTargetDesc/RISCVAsmBackend.h | 14 -------------- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index bdbd4fed38720..691988ae1f1df 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -135,10 +135,6 @@ class MCAsmBackend { uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const = 0; - /// Check whether the given target requires emitting differences of two - /// symbols as a set of relocations. - virtual bool requiresDiffExpressionRelocations() const { return false; } - /// @} /// \name Target Relaxation Interfaces diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index df45f639c0581..a67dfa06dec4d 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -469,14 +469,12 @@ void MCObjectStreamer::emitDwarfAdvanceLineAddr(int64_t LineDelta, // fragment, then the address-offset is already a fixed constant and is not // relaxable. Emit the advance-line-addr data immediately to save time and // memory. - if (!Assembler->getBackend().requiresDiffExpressionRelocations()) { - if (auto OptAddrDelta = absoluteSymbolDiff(Label, LastLabel)) { - SmallString<16> Tmp; - MCDwarfLineAddr::encode(getContext(), Assembler->getDWARFLinetableParams(), - LineDelta, *OptAddrDelta, Tmp); - emitBytes(Tmp); - return; - } + if (auto OptAddrDelta = absoluteSymbolDiff(Label, LastLabel)) { + SmallString<16> Tmp; + MCDwarfLineAddr::encode(getContext(), Assembler->getDWARFLinetableParams(), + LineDelta, *OptAddrDelta, Tmp); + emitBytes(Tmp); + return; } const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel, SMLoc()); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h index 0ad5be40b4233..5db55ad0b8567 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h @@ -35,20 +35,6 @@ class RISCVAsmBackend : public MCAsmBackend { void setForceRelocs() { ForceRelocs = true; } - // Returns true if relocations will be forced for shouldForceRelocation by - // default. This will be true if relaxation is enabled or had previously - // been enabled. - bool willForceRelocations() const { - return ForceRelocs || STI.getFeatureBits()[RISCV::FeatureRelax]; - } - - // Generate diff expression relocations if the relax feature is enabled or had - // previously been enabled, otherwise it is safe for the assembler to - // calculate these internally. - bool requiresDiffExpressionRelocations() const override { - return willForceRelocations(); - } - // Return Size with extra Nop Bytes for alignment directive in code section. bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF, unsigned &Size) override; From ba944d4e9cc6de66d03aab05eb73d3e70674da2c Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Thu, 8 May 2025 10:55:42 +0100 Subject: [PATCH 6/7] Update RISVC test for new expected output --- llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll index 548cf2daa9d8d..d609a3fa889c0 100644 --- a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll +++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll @@ -38,7 +38,7 @@ ; LINE-DUMP: .debug_line contents: ; LINE-DUMP-NEXT: debug_line[0x00000000] ; LINE-DUMP-NEXT: Line table prologue: -; LINE-DUMP-NEXT: total_length: 0x00000067 +; LINE-DUMP-NEXT: total_length: 0x00000061 ; LINE-DUMP-NEXT: format: DWARF32 ; LINE-DUMP-NEXT: version: 5 ; LINE-DUMP-NEXT: address_size: 4 @@ -83,12 +83,10 @@ ; LINE-DUMP-NEXT: 0x000000000000001c 3 5 0 0 0 0 is_stmt prologue_end ; LINE-DUMP-NEXT:0x0000005d: 06 DW_LNS_negate_stmt ; LINE-DUMP-NEXT:0x0000005e: 0b DW_LNS_set_epilogue_begin -; LINE-DUMP-NEXT:0x0000005f: 03 DW_LNS_advance_line (3) -; LINE-DUMP-NEXT:0x00000061: 09 DW_LNS_fixed_advance_pc (addr += 0x0004, op-index = 0) -; LINE-DUMP-NEXT:0x00000064: 01 DW_LNS_copy +; LINE-DUMP-NEXT:0x0000005f: 4a address += 4, line += 0, op-index += 0 ; LINE-DUMP-NEXT: 0x0000000000000020 3 5 0 0 0 0 epilogue_begin -; LINE-DUMP-NEXT:0x00000065: 09 DW_LNS_fixed_advance_pc (addr += 0x0010, op-index = 0) -; LINE-DUMP-NEXT:0x00000068: 00 DW_LNE_end_sequence +; LINE-DUMP-NEXT:0x00000060: 02 DW_LNS_advance_pc (addr += 16, op-index += 0) +; LINE-DUMP-NEXT:0x00000062: 00 DW_LNE_end_sequence ; LINE-DUMP-NEXT: 0x0000000000000030 3 5 0 0 0 0 end_sequence ; ModuleID = 'dwarf-riscv-relocs.c' From c36881d7a321f4c3715fed47b32ac7908d94e2b2 Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Mon, 12 May 2025 15:04:35 +0100 Subject: [PATCH 7/7] Review feedback (improved comments, mandatory REQUIRES) --- llvm/lib/MC/MCObjectStreamer.cpp | 7 +++---- llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll | 2 ++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index a67dfa06dec4d..7036c408c2e32 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -465,10 +465,9 @@ void MCObjectStreamer::emitDwarfAdvanceLineAddr(int64_t LineDelta, return; } - // If the two labels are within the same fragment and it's a plain data - // fragment, then the address-offset is already a fixed constant and is not - // relaxable. Emit the advance-line-addr data immediately to save time and - // memory. + // If the two labels are within the same fragment, then the address-offset is + // already a fixed constant and is not relaxable. Emit the advance-line-addr + // data immediately to save time and memory. if (auto OptAddrDelta = absoluteSymbolDiff(Label, LastLabel)) { SmallString<16> Tmp; MCDwarfLineAddr::encode(getContext(), Assembler->getDWARFLinetableParams(), diff --git a/llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll b/llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll index e24e9787a05b3..16c9e12accbc0 100644 --- a/llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll +++ b/llvm/test/DebugInfo/X86/debug-line-in-one-fragment.ll @@ -1,6 +1,8 @@ ; RUN: llc %s -o %t.o -filetype=obj ; RUN: llvm-dwarfdump --debug-line %t.o | FileCheck %s --check-prefix=LINES ; RUN: llc %s -o %t.o -filetype=obj -debug-only=mc-dump 2>&1 | FileCheck %s --check-prefix=FRAGMENTS +; +; REQUIRES: asserts ;; Test (using mc-dump debug output) that .debug_line can be arranged in memory ;; using a single data fragment for a simple function, instead of using multiple