From 03385da68038c3fd40fc085c8fb1914529116837 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 12 Nov 2024 11:30:39 +0100 Subject: [PATCH 1/2] [lldb] Expose discontinuous functions through SBFunction::GetRanges SBFunction::GetEndAddress doesn't really make sense for discontinuous functions, so I'm declaring it deprecated. GetStartAddress sort of makes sense, if one uses it to find the functions entry point, so I'm keeping that undeprecated. I've made the test a Shell tests because these make it easier to create discontinuous functions regardless of the host os and architecture. They do make testing the python API harder, but I think I've managed to come up with something not entirely unreasonable. --- lldb/include/lldb/API/SBAddressRangeList.h | 1 + lldb/include/lldb/API/SBFunction.h | 2 + lldb/include/lldb/Core/AddressRangeListImpl.h | 5 +- lldb/include/lldb/Symbol/Function.h | 3 + lldb/source/API/SBFunction.cpp | 17 +- lldb/source/Core/AddressRangeListImpl.cpp | 8 - .../Python/sb_function_ranges.s | 182 ++++++++++++++++++ 7 files changed, 198 insertions(+), 20 deletions(-) create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s diff --git a/lldb/include/lldb/API/SBAddressRangeList.h b/lldb/include/lldb/API/SBAddressRangeList.h index 5a4eeecf37dc9..41085b1edf8d7 100644 --- a/lldb/include/lldb/API/SBAddressRangeList.h +++ b/lldb/include/lldb/API/SBAddressRangeList.h @@ -45,6 +45,7 @@ class LLDB_API SBAddressRangeList { private: friend class SBBlock; friend class SBProcess; + friend class SBFunction; lldb_private::AddressRangeListImpl &ref() const; diff --git a/lldb/include/lldb/API/SBFunction.h b/lldb/include/lldb/API/SBFunction.h index df607fdc7ebf5..0a8aeeff1ea5a 100644 --- a/lldb/include/lldb/API/SBFunction.h +++ b/lldb/include/lldb/API/SBFunction.h @@ -43,6 +43,8 @@ class LLDB_API SBFunction { lldb::SBAddress GetStartAddress(); + LLDB_DEPRECATED_FIXME("Not compatible with discontinuous functions.", + "GetRanges()") lldb::SBAddress GetEndAddress(); lldb::SBAddressRangeList GetRanges(); diff --git a/lldb/include/lldb/Core/AddressRangeListImpl.h b/lldb/include/lldb/Core/AddressRangeListImpl.h index 6742e6ead87de..6b88f9b1ac179 100644 --- a/lldb/include/lldb/Core/AddressRangeListImpl.h +++ b/lldb/include/lldb/Core/AddressRangeListImpl.h @@ -24,9 +24,8 @@ class AddressRangeListImpl { public: AddressRangeListImpl(); - AddressRangeListImpl(const AddressRangeListImpl &rhs) = default; - - AddressRangeListImpl &operator=(const AddressRangeListImpl &rhs); + explicit AddressRangeListImpl(AddressRanges ranges) + : m_ranges(std::move(ranges)) {} size_t GetSize() const; diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 70f51a846f8d9..11921398ac765 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -444,8 +444,11 @@ class Function : public UserID, public SymbolContextScope { Function *CalculateSymbolContextFunction() override; + // DEPRECATED: Use GetAddressRanges instead. const AddressRange &GetAddressRange() { return m_range; } + const AddressRanges &GetAddressRanges() const { return m_ranges; } + lldb::LanguageType GetLanguage() const; /// Find the file and line number of the source location of the start of the /// function. This will use the declaration if present and fall back on the diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp index ac61220ec8736..2ef62eea4d199 100644 --- a/lldb/source/API/SBFunction.cpp +++ b/lldb/source/API/SBFunction.cpp @@ -10,6 +10,7 @@ #include "lldb/API/SBAddressRange.h" #include "lldb/API/SBProcess.h" #include "lldb/API/SBStream.h" +#include "lldb/Core/AddressRangeListImpl.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/Module.h" #include "lldb/Symbol/CompileUnit.h" @@ -153,10 +154,11 @@ SBAddress SBFunction::GetEndAddress() { SBAddress addr; if (m_opaque_ptr) { - addr_t byte_size = m_opaque_ptr->GetAddressRange().GetByteSize(); - if (byte_size > 0) { - addr.SetAddress(m_opaque_ptr->GetAddressRange().GetBaseAddress()); - addr->Slide(byte_size); + llvm::ArrayRef ranges = m_opaque_ptr->GetAddressRanges(); + if (!ranges.empty()) { + // Return the end of the first range, use GetRanges to get all ranges. + addr.SetAddress(ranges.front().GetBaseAddress()); + addr->Slide(ranges.front().GetByteSize()); } } return addr; @@ -166,11 +168,8 @@ lldb::SBAddressRangeList SBFunction::GetRanges() { LLDB_INSTRUMENT_VA(this); lldb::SBAddressRangeList ranges; - if (m_opaque_ptr) { - lldb::SBAddressRange range; - (*range.m_opaque_up) = m_opaque_ptr->GetAddressRange(); - ranges.Append(std::move(range)); - } + if (m_opaque_ptr) + ranges.ref() = AddressRangeListImpl(m_opaque_ptr->GetAddressRanges()); return ranges; } diff --git a/lldb/source/Core/AddressRangeListImpl.cpp b/lldb/source/Core/AddressRangeListImpl.cpp index d405cf0fa3ec3..257824a0551e1 100644 --- a/lldb/source/Core/AddressRangeListImpl.cpp +++ b/lldb/source/Core/AddressRangeListImpl.cpp @@ -13,14 +13,6 @@ using namespace lldb_private; AddressRangeListImpl::AddressRangeListImpl() : m_ranges() {} -AddressRangeListImpl & -AddressRangeListImpl::operator=(const AddressRangeListImpl &rhs) { - if (this == &rhs) - return *this; - m_ranges = rhs.m_ranges; - return *this; -} - size_t AddressRangeListImpl::GetSize() const { return m_ranges.size(); } void AddressRangeListImpl::Reserve(size_t capacity) { diff --git a/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s new file mode 100644 index 0000000000000..09b41148c7068 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s @@ -0,0 +1,182 @@ +# REQUIRES: x86 + +# RUN: split-file %s %t +# RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %t/input.s -o %t/input.o +# RUN: %lldb %t/input.o -o "command script import %t/script.py" -o exit | FileCheck %s + +# CHECK: Found 1 function(s). +# CHECK: foo: [input.o[0x0-0x7), input.o[0x7-0xe), input.o[0x14-0x1b), input.o[0x1b-0x1c)] + +#--- script.py +import lldb + +def __lldb_init_module(debugger, internal_dict): + target = debugger.GetSelectedTarget() + sym_ctxs = target.FindFunctions("foo") + print(f"Found {len(sym_ctxs)} function(s).") + for ctx in sym_ctxs: + fn = ctx.function + print(f"{fn.name}: {fn.GetRanges()}") + +#--- input.s +# An example of a function which has been split into two parts. Roughly +# corresponds to this C code. +# int baz(); +# int bar() { return 47; } +# int foo(int flag) { return flag ? bar() : baz(); } +# The function bar has been placed "in the middle" of foo. + + .text + + .type foo,@function +foo: + .cfi_startproc + cmpl $0, %edi + je foo.__part.2 + jmp foo.__part.1 + .cfi_endproc +.Lfoo_end: + .size foo, .Lfoo_end-foo + +foo.__part.1: + .cfi_startproc + callq bar + jmp foo.__part.3 +.Lfoo.__part.1_end: + .size foo.__part.1, .Lfoo.__part.1_end-foo.__part.1 + .cfi_endproc + +bar: + .cfi_startproc + movl $47, %eax + retq + .cfi_endproc +.Lbar_end: + .size bar, .Lbar_end-bar + +foo.__part.2: + .cfi_startproc + callq baz + jmp foo.__part.3 +.Lfoo.__part.2_end: + .size foo.__part.2, .Lfoo.__part.2_end-foo.__part.2 + .cfi_endproc + +foo.__part.3: + .cfi_startproc + retq +.Lfoo.__part.3_end: + .size foo.__part.3, .Lfoo.__part.3_end-foo.__part.3 + .cfi_endproc + + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 8 # DW_FORM_string + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 116 # DW_AT_rnglists_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 1 # DW_FORM_addr + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 85 # DW_AT_ranges + .byte 35 # DW_FORM_rnglistx + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 8 # DW_FORM_string + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .asciz "Hand-written DWARF" # DW_AT_producer + .short 29 # DW_AT_language + .quad 0 # DW_AT_low_pc + .byte 1 # DW_AT_ranges + .long .Lrnglists_table_base0 # DW_AT_rnglists_base + .byte 2 # Abbrev [2] DW_TAG_subprogram + .quad bar # DW_AT_low_pc + .quad .Lbar_end # DW_AT_high_pc + .asciz "bar" # DW_AT_name + .byte 3 # Abbrev [3] DW_TAG_subprogram + .byte 0 # DW_AT_ranges + .byte 1 # DW_AT_frame_base + .byte 86 + .asciz "foo" # DW_AT_name + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + + .section .debug_rnglists,"",@progbits + .long .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length +.Ldebug_list_header_start0: + .short 5 # Version + .byte 8 # Address size + .byte 0 # Segment selector size + .long 2 # Offset entry count +.Lrnglists_table_base0: + .long .Ldebug_ranges0-.Lrnglists_table_base0 + .long .Ldebug_ranges1-.Lrnglists_table_base0 +.Ldebug_ranges0: + .byte 6 # DW_RLE_start_end + .quad foo + .quad .Lfoo_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.1 + .quad .Lfoo.__part.1_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.2 + .quad .Lfoo.__part.2_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.3 + .quad .Lfoo.__part.3_end + .byte 0 # DW_RLE_end_of_list +.Ldebug_ranges1: + .byte 6 # DW_RLE_start_end + .quad bar + .quad .Lbar_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.1 + .quad .Lfoo.__part.1_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.2 + .quad .Lfoo.__part.2_end + .byte 6 # DW_RLE_start_end + .quad foo.__part.3 + .quad .Lfoo.__part.3_end + .byte 6 # DW_RLE_start_end + .quad foo + .quad .Lfoo_end + .byte 0 # DW_RLE_end_of_list +.Ldebug_list_header_end0: + + .section ".note.GNU-stack","",@progbits From 691f0e059c1f1d51e09e1fdb718224827a8b33b7 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 3 Dec 2024 09:53:38 +0100 Subject: [PATCH 2/2] doxygenize --- lldb/include/lldb/Symbol/Function.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 11921398ac765..855940a6415d7 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -444,7 +444,7 @@ class Function : public UserID, public SymbolContextScope { Function *CalculateSymbolContextFunction() override; - // DEPRECATED: Use GetAddressRanges instead. + /// DEPRECATED: Use GetAddressRanges instead. const AddressRange &GetAddressRange() { return m_range; } const AddressRanges &GetAddressRanges() const { return m_ranges; }