Skip to content

Commit e74487f

Browse files
committed
simplify disassembly logics significantly
1 parent b9465b5 commit e74487f

File tree

4 files changed

+159
-156
lines changed

4 files changed

+159
-156
lines changed

lldb/source/API/SBTarget.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,19 +1672,19 @@ uint32_t SBTarget::GetMinimumOpcodeByteSize() const {
16721672
LLDB_INSTRUMENT_VA(this);
16731673

16741674
TargetSP target_sp(GetSP());
1675-
if (target_sp) {
1675+
if (target_sp)
16761676
return target_sp->GetArchitecture().GetMinimumOpcodeByteSize();
1677-
}
1677+
16781678
return 0;
16791679
}
16801680

16811681
uint32_t SBTarget::GetMaximumOpcodeByteSize() const {
16821682
LLDB_INSTRUMENT_VA(this);
16831683

16841684
TargetSP target_sp(GetSP());
1685-
if (target_sp) {
1685+
if (target_sp)
16861686
return target_sp->GetArchitecture().GetMaximumOpcodeByteSize();
1687-
}
1687+
16881688
return 0;
16891689
}
16901690

lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,18 @@ def test_disassemble_backwards(self):
4646
instruction_pointer_reference = self.get_stackFrames()[1][
4747
"instructionPointerReference"
4848
]
49-
backwards_instructions = 50
49+
backwards_instructions = 200
50+
instructions_count = 400
5051
instructions = self.dap_server.request_disassemble(
5152
memoryReference=instruction_pointer_reference,
5253
instructionOffset=-backwards_instructions,
54+
instructionCount=instructions_count,
55+
)
56+
57+
self.assertEqual(
58+
len(instructions),
59+
instructions_count,
60+
"Disassemble request should return the exact requested number of instructions.",
5361
)
5462

5563
frame_instruction_index = next(

lldb/tools/lldb-dap/Handler/DisassembleRequestHandler.cpp

Lines changed: 146 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -12,164 +12,85 @@
1212
#include "Protocol/ProtocolRequests.h"
1313
#include "Protocol/ProtocolTypes.h"
1414
#include "RequestHandler.h"
15+
#include "lldb/API/SBAddress.h"
1516
#include "lldb/API/SBInstruction.h"
17+
#include "lldb/API/SBTarget.h"
1618
#include "lldb/lldb-types.h"
1719
#include "llvm/ADT/StringExtras.h"
20+
#include "llvm/Support/Error.h"
21+
#include <cstdint>
1822
#include <optional>
1923

2024
using namespace lldb_dap::protocol;
2125

2226
namespace lldb_dap {
2327

24-
/// Disassembles code stored at the provided location.
25-
/// Clients should only call this request if the corresponding capability
26-
/// `supportsDisassembleRequest` is true.
27-
llvm::Expected<DisassembleResponseBody>
28-
DisassembleRequestHandler::Run(const DisassembleArguments &args) const {
29-
std::optional<lldb::addr_t> addr_opt =
30-
DecodeMemoryReference(args.memoryReference);
31-
if (!addr_opt.has_value())
32-
return llvm::make_error<DAPError>("Malformed memory reference: " +
33-
args.memoryReference);
34-
35-
lldb::addr_t addr_ptr = *addr_opt;
36-
addr_ptr += args.offset.value_or(0);
37-
lldb::SBAddress addr(addr_ptr, dap.target);
38-
if (!addr.IsValid())
39-
return llvm::make_error<DAPError>(
40-
"Memory reference not found in the current binary.");
41-
42-
std::string flavor_string;
43-
const auto target_triple = llvm::StringRef(dap.target.GetTriple());
44-
// This handles both 32 and 64bit x86 architecture. The logic is duplicated in
45-
// `CommandObjectDisassemble::CommandOptions::OptionParsingStarting`
46-
if (target_triple.starts_with("x86")) {
47-
const lldb::SBStructuredData flavor =
48-
dap.debugger.GetSetting("target.x86-disassembly-flavor");
49-
50-
const size_t str_length = flavor.GetStringValue(nullptr, 0);
51-
if (str_length != 0) {
52-
flavor_string.resize(str_length + 1);
53-
flavor.GetStringValue(flavor_string.data(), flavor_string.length());
54-
}
55-
}
56-
57-
int64_t instructionOffset = args.instructionOffset.value_or(0);
58-
if (instructionOffset > 0) {
59-
lldb::SBInstructionList forward_insts = dap.target.ReadInstructions(
60-
addr, instructionOffset + 1, flavor_string.c_str());
61-
if (forward_insts.GetSize() != static_cast<size_t>(instructionOffset + 1)) {
62-
return llvm::make_error<DAPError>(
63-
"Failed to disassemble instructions after " +
64-
std::to_string(instructionOffset) +
65-
" instructions from the given address.");
66-
}
67-
68-
addr = forward_insts.GetInstructionAtIndex(instructionOffset).GetAddress();
69-
}
70-
71-
const bool resolve_symbols = args.resolveSymbols.value_or(false);
72-
std::vector<DisassembledInstruction> instructions;
73-
if (instructionOffset < 0)
74-
instructions = disassembleBackwards(addr, std::abs(instructionOffset),
75-
flavor_string.c_str(), resolve_symbols);
76-
77-
const auto instructions_left = args.instructionCount - instructions.size();
78-
lldb::SBInstructionList insts = dap.target.ReadInstructions(
79-
addr, instructions_left, flavor_string.c_str());
80-
81-
if (!insts.IsValid())
82-
return llvm::make_error<DAPError>(
83-
"Failed to find instructions for memory address.");
84-
85-
// add the disassembly from the given address forward
86-
const auto num_insts = insts.GetSize();
87-
for (size_t i = 0;
88-
i < num_insts && instructions.size() < args.instructionCount; ++i) {
89-
lldb::SBInstruction inst = insts.GetInstructionAtIndex(i);
90-
instructions.push_back(
91-
SBInstructionToDisassembledInstruction(inst, resolve_symbols));
92-
}
93-
94-
// Pad the instructions with invalid instructions if needed.
95-
if (instructions.size() < args.instructionCount)
96-
for (size_t i = instructions.size(); i < args.instructionCount; ++i)
97-
instructions.push_back(GetInvalidInstruction());
98-
99-
return DisassembleResponseBody{std::move(instructions)};
28+
static protocol::DisassembledInstruction GetInvalidInstruction() {
29+
DisassembledInstruction invalid_inst;
30+
invalid_inst.presentationHint =
31+
DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid;
32+
return invalid_inst;
10033
}
10134

102-
std::vector<protocol::DisassembledInstruction>
103-
DisassembleRequestHandler::disassembleBackwards(
104-
lldb::SBAddress &addr, const uint32_t instruction_count,
105-
const char *flavor_string, bool resolve_symbols) const {
106-
std::vector<DisassembledInstruction> instructions;
35+
static lldb::SBAddress GetDisassembleStartAddress(lldb::SBTarget target,
36+
lldb::SBAddress addr,
37+
int64_t instruction_offset) {
38+
if (instruction_offset == 0)
39+
return addr;
40+
41+
if (target.GetMinimumOpcodeByteSize() == target.GetMaximumOpcodeByteSize()) {
42+
// We have fixed opcode size, so we can calculate the address directly,
43+
// negative or positive.
44+
lldb::addr_t load_addr = addr.GetLoadAddress(target);
45+
load_addr += instruction_offset * target.GetMinimumOpcodeByteSize();
46+
return lldb::SBAddress(load_addr, target);
47+
}
10748

108-
if (dap.target.GetMinimumOpcodeByteSize() ==
109-
dap.target.GetMaximumOpcodeByteSize()) {
110-
// If the target has a fixed opcode size, we can disassemble backwards
111-
// directly.
112-
lldb::addr_t disassemble_start_load_addr =
113-
addr.GetLoadAddress(dap.target) -
114-
(instruction_count * dap.target.GetMinimumOpcodeByteSize());
115-
lldb::SBAddress disassemble_start_addr(disassemble_start_load_addr,
116-
dap.target);
117-
lldb::SBInstructionList backwards_insts =
118-
dap.target.ReadInstructions(addr, instruction_count, flavor_string);
119-
if (backwards_insts.IsValid()) {
120-
for (size_t i = 0; i < backwards_insts.GetSize(); ++i) {
121-
lldb::SBInstruction inst = backwards_insts.GetInstructionAtIndex(i);
122-
instructions.push_back(
123-
SBInstructionToDisassembledInstruction(inst, resolve_symbols));
124-
}
125-
return instructions;
126-
}
127-
} else {
128-
// There is no opcode fixed size so we have no idea where are the valid
129-
// instructions before the current address. let's try from the start of the
130-
// symbol if available.
131-
auto symbol = addr.GetSymbol();
132-
if (symbol.IsValid()) {
133-
// add valid instructions before the current instruction using the symbol.
134-
lldb::SBInstructionList symbol_insts = dap.target.ReadInstructions(
135-
symbol.GetStartAddress(), addr, flavor_string);
136-
if (symbol_insts.IsValid()) {
137-
size_t backwards_insts_start =
138-
symbol_insts.GetSize() >= instruction_count
139-
? symbol_insts.GetSize() - instruction_count
140-
: 0;
141-
for (size_t i = backwards_insts_start;
142-
i < symbol_insts.GetSize() &&
143-
instructions.size() < instruction_count;
144-
++i) {
145-
lldb::SBInstruction inst = symbol_insts.GetInstructionAtIndex(i);
146-
instructions.push_back(
147-
SBInstructionToDisassembledInstruction(inst, resolve_symbols));
148-
}
149-
}
150-
}
49+
if (instruction_offset > 0) {
50+
lldb::SBInstructionList forward_insts =
51+
target.ReadInstructions(addr, instruction_offset + 1);
52+
return forward_insts.GetInstructionAtIndex(forward_insts.GetSize() - 1)
53+
.GetAddress();
15154
}
15255

153-
// pad the instructions with invalid instructions if needed.
154-
while (instructions.size() < instruction_count) {
155-
instructions.insert(instructions.begin(), GetInvalidInstruction());
56+
// We have a negative instruction offset, so we need to disassemble backwards.
57+
// The opcode size is not fixed, so we have no idea where to start from.
58+
// Let's try from the start of the current symbol if available.
59+
auto symbol = addr.GetSymbol();
60+
if (!symbol.IsValid())
61+
return addr;
62+
63+
// Add valid instructions before the current instruction using the symbol.
64+
lldb::SBInstructionList symbol_insts =
65+
target.ReadInstructions(symbol.GetStartAddress(), addr, nullptr);
66+
if (!symbol_insts.IsValid() || symbol_insts.GetSize() == 0)
67+
return addr;
68+
69+
const auto backwards_instructions_count =
70+
static_cast<size_t>(std::abs(instruction_offset));
71+
if (symbol_insts.GetSize() < backwards_instructions_count) {
72+
// We don't have enough instructions to disassemble backwards, so just
73+
// return the start address of the symbol.
74+
return symbol_insts.GetInstructionAtIndex(0).GetAddress();
15675
}
15776

158-
return instructions;
77+
return symbol_insts
78+
.GetInstructionAtIndex(symbol_insts.GetSize() -
79+
backwards_instructions_count)
80+
.GetAddress();
15981
}
16082

161-
DisassembledInstruction
162-
DisassembleRequestHandler::SBInstructionToDisassembledInstruction(
163-
lldb::SBInstruction &inst, bool resolve_symbols) const {
83+
static DisassembledInstruction ConvertSBInstructionToDisassembledInstruction(
84+
lldb::SBTarget &target, lldb::SBInstruction &inst, bool resolve_symbols) {
16485
if (!inst.IsValid())
16586
return GetInvalidInstruction();
16687

16788
auto addr = inst.GetAddress();
168-
const auto inst_addr = addr.GetLoadAddress(dap.target);
169-
const char *m = inst.GetMnemonic(dap.target);
170-
const char *o = inst.GetOperands(dap.target);
171-
const char *c = inst.GetComment(dap.target);
172-
auto d = inst.GetData(dap.target);
89+
const auto inst_addr = addr.GetLoadAddress(target);
90+
const char *m = inst.GetMnemonic(target);
91+
const char *o = inst.GetOperands(target);
92+
const char *c = inst.GetComment(target);
93+
auto d = inst.GetData(target);
17394

17495
std::string bytes;
17596
llvm::raw_string_ostream sb(bytes);
@@ -246,12 +167,95 @@ DisassembleRequestHandler::SBInstructionToDisassembledInstruction(
246167
return disassembled_inst;
247168
}
248169

249-
DisassembledInstruction
250-
DisassembleRequestHandler::GetInvalidInstruction() const {
251-
DisassembledInstruction invalid_inst;
252-
invalid_inst.presentationHint =
253-
DisassembledInstruction::eDisassembledInstructionPresentationHintInvalid;
254-
return invalid_inst;
170+
/// Disassembles code stored at the provided location.
171+
/// Clients should only call this request if the corresponding capability
172+
/// `supportsDisassembleRequest` is true.
173+
llvm::Expected<DisassembleResponseBody>
174+
DisassembleRequestHandler::Run(const DisassembleArguments &args) const {
175+
std::optional<lldb::addr_t> addr_opt =
176+
DecodeMemoryReference(args.memoryReference);
177+
if (!addr_opt.has_value())
178+
return llvm::make_error<DAPError>("Malformed memory reference: " +
179+
args.memoryReference);
180+
181+
lldb::addr_t addr_ptr = *addr_opt;
182+
addr_ptr += args.offset.value_or(0);
183+
lldb::SBAddress addr(addr_ptr, dap.target);
184+
if (!addr.IsValid())
185+
return llvm::make_error<DAPError>(
186+
"Memory reference not found in the current binary.");
187+
188+
std::string flavor_string;
189+
const auto target_triple = llvm::StringRef(dap.target.GetTriple());
190+
// This handles both 32 and 64bit x86 architecture. The logic is duplicated in
191+
// `CommandObjectDisassemble::CommandOptions::OptionParsingStarting`
192+
if (target_triple.starts_with("x86")) {
193+
const lldb::SBStructuredData flavor =
194+
dap.debugger.GetSetting("target.x86-disassembly-flavor");
195+
196+
const size_t str_length = flavor.GetStringValue(nullptr, 0);
197+
if (str_length != 0) {
198+
flavor_string.resize(str_length + 1);
199+
flavor.GetStringValue(flavor_string.data(), flavor_string.length());
200+
}
201+
}
202+
203+
// Offset (in instructions) to be applied after the byte offset (if any)
204+
// before disassembling. Can be negative.
205+
int64_t instruction_offset = args.instructionOffset.value_or(0);
206+
207+
// Calculate a sufficient address to start disassembling from.
208+
lldb::SBAddress disassemble_start_addr =
209+
GetDisassembleStartAddress(dap.target, addr, instruction_offset);
210+
if (!disassemble_start_addr.IsValid())
211+
return llvm::make_error<DAPError>(
212+
"Unexpected error while disassembling instructions.");
213+
214+
lldb::SBInstructionList insts = dap.target.ReadInstructions(
215+
disassemble_start_addr, args.instructionCount, flavor_string.c_str());
216+
if (!insts.IsValid())
217+
return llvm::make_error<DAPError>(
218+
"Unexpected error while disassembling instructions.");
219+
220+
// Conver the found instructions to the DAP format.
221+
const bool resolve_symbols = args.resolveSymbols.value_or(false);
222+
std::vector<DisassembledInstruction> instructions;
223+
size_t original_address_index = args.instructionCount;
224+
for (size_t i = 0; i < insts.GetSize(); ++i) {
225+
lldb::SBInstruction inst = insts.GetInstructionAtIndex(i);
226+
if (inst.GetAddress() == addr)
227+
original_address_index = i;
228+
229+
instructions.push_back(ConvertSBInstructionToDisassembledInstruction(
230+
dap.target, inst, resolve_symbols));
231+
}
232+
233+
// Check if we miss instructions at the beginning.
234+
if (instruction_offset < 0) {
235+
const auto backwards_instructions_count =
236+
static_cast<size_t>(std::abs(instruction_offset));
237+
if (original_address_index < backwards_instructions_count) {
238+
// We don't have enough instructions before the main address as was
239+
// requested. Let's pad the start of the instructions with invalid
240+
// instructions.
241+
std::vector<DisassembledInstruction> invalid_instructions(
242+
backwards_instructions_count - original_address_index,
243+
GetInvalidInstruction());
244+
instructions.insert(instructions.begin(), invalid_instructions.begin(),
245+
invalid_instructions.end());
246+
247+
// Trim excess instructions if needed.
248+
if (instructions.size() > args.instructionCount)
249+
instructions.resize(args.instructionCount);
250+
}
251+
}
252+
253+
// Pad the instructions with invalid instructions if needed.
254+
while (instructions.size() < args.instructionCount) {
255+
instructions.push_back(GetInvalidInstruction());
256+
}
257+
258+
return DisassembleResponseBody{std::move(instructions)};
255259
}
256260

257261
} // namespace lldb_dap

lldb/tools/lldb-dap/Handler/RequestHandler.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "Protocol/ProtocolBase.h"
1616
#include "Protocol/ProtocolRequests.h"
1717
#include "Protocol/ProtocolTypes.h"
18-
#include "lldb/API/SBAddress.h"
1918
#include "llvm/ADT/DenseSet.h"
2019
#include "llvm/ADT/StringRef.h"
2120
#include "llvm/Support/Error.h"
@@ -546,14 +545,6 @@ class DisassembleRequestHandler final
546545
}
547546
llvm::Expected<protocol::DisassembleResponseBody>
548547
Run(const protocol::DisassembleArguments &args) const override;
549-
550-
std::vector<protocol::DisassembledInstruction>
551-
disassembleBackwards(lldb::SBAddress &addr, const uint32_t instruction_count,
552-
const char *flavor_string, bool resolve_symbols) const;
553-
protocol::DisassembledInstruction
554-
SBInstructionToDisassembledInstruction(lldb::SBInstruction &inst,
555-
bool resolve_symbols) const;
556-
protocol::DisassembledInstruction GetInvalidInstruction() const;
557548
};
558549

559550
class ReadMemoryRequestHandler : public LegacyRequestHandler {

0 commit comments

Comments
 (0)