Skip to content
87 changes: 87 additions & 0 deletions lldb/examples/python/filter_disasm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Defines a command, fdis, that does filtered disassembly. The command does the
lldb disassemble command with -b and any other arguments passed in, and
pipes that through a provided filter program.
The intention is to support disassembly of RISC-V proprietary instructions.
This is handled with llvm-objdump by piping the output of llvm-objdump through
a filter program. This script is intended to mimic that workflow.
"""

import lldb
import subprocess

filter_program = "crustfilt"

def __lldb_init_module(debugger, dict):
debugger.HandleCommand(
'command script add -f filter_disasm.fdis fdis')
print("Disassembly filter command (fdis) loaded")
print("Filter program set to %s" % filter_program)


def fdis(debugger, args, result, dict):
"""
Call the built in disassembler, then pass its output to a filter program
to add in disassembly for hidden opcodes.
Except for get and set, use the fdis command like the disassemble command.
By default, the filter program is crustfilt, from
https://github.com/quic/crustfilt . This can be changed by changing
the global variable filter_program.
Usage:
fdis [[get] [set <program>] [<disassembly options>]]
Choose one of the following:
get
Gets the current filter program
set <program>
Sets the current filter program. This can be an executable, which
will be found on PATH, or an absolute path.
<disassembly options>
If the first argument is not get or set, the args will be passed
to the disassemble command as is.
"""

global filter_program
args_list = args.split(' ')
result.Clear()

if len(args_list) == 1 and args_list[0] == 'get':
result.PutCString(filter_program)
result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
return

if len(args_list) == 2 and args_list[0] == 'set':
filter_program = args_list[1]
result.PutCString("Filter program set to %s" % filter_program)
result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
return

res = lldb.SBCommandReturnObject()
debugger.GetCommandInterpreter().HandleCommand('disassemble -b ' + args, res)
if (len(res.GetError()) > 0):
result.SetError(res.GetError())
result.SetStatus(lldb.eReturnStatusFailed)
return
output = res.GetOutput()

try:
proc = subprocess.run([filter_program], capture_output=True, text=True, input=output)
except (subprocess.SubprocessError, OSError) as e:
result.PutCString("Error occurred. Original disassembly:\n\n" + output)
result.SetError(str(e))
result.SetStatus(lldb.eReturnStatusFailed)
return

print(proc.stderr)
if proc.stderr:
pass
#result.SetError(proc.stderr)
#result.SetStatus(lldb.eReturnStatusFailed)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I presume this needs to be swapped. Put stderr in the error so that the user will see it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure the presence of data on stderr means a failure. I'll look into error handling more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like python's subprocess.run returns a subprocess.CompletedProcess object which has a returncode ivar, and the usual convention in Unix is 0 means success. I wouldn't treat the presence of stderr as indicative of an error by itself.

else:
result.PutCString(proc.stdout)
result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
1 change: 1 addition & 0 deletions lldb/include/lldb/Core/Opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ class Opcode {
}

int Dump(Stream *s, uint32_t min_byte_width);
int DumpRISCV(Stream *s, uint32_t min_byte_width);

const void *GetOpcodeBytes() const {
return ((m_type == Opcode::eTypeBytes) ? m_data.inst.bytes : nullptr);
Expand Down
14 changes: 11 additions & 3 deletions lldb/source/Core/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,8 +658,13 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
// the byte dump to be able to always show 15 bytes (3 chars each) plus a
// space
if (max_opcode_byte_size > 0)
m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
else
// make RISC-V opcode dump look like llvm-objdump
if (exe_ctx &&
exe_ctx->GetTargetSP()->GetArchitecture().GetTriple().isRISCV())
m_opcode.DumpRISCV(&ss, max_opcode_byte_size * 3 + 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull the minimum size out into:

auto min_byte_width = max_opcode_byte_size * 3 + 1;

Then use it in both calls, so it's clear they use the same minimum.

else
m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1);
else
m_opcode.Dump(&ss, 15 * 3 + 1);
} else {
// Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000
Expand All @@ -685,10 +690,13 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
}
}
const size_t opcode_pos = ss.GetSizeOfLastLine();
const std::string &opcode_name =
std::string &opcode_name =
show_color ? m_markup_opcode_name : m_opcode_name;
const std::string &mnemonics = show_color ? m_markup_mnemonics : m_mnemonics;

if (opcode_name.empty())
opcode_name = "<unknown>";

// The default opcode size of 7 characters is plenty for most architectures
// but some like arm can pull out the occasional vqrshrun.s16. We won't get
// consistent column spacing in these cases, unfortunately. Also note that we
Expand Down
38 changes: 38 additions & 0 deletions lldb/source/Core/Opcode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,44 @@ lldb::ByteOrder Opcode::GetDataByteOrder() const {
return eByteOrderInvalid;
}

// make RISC-V byte dumps look like llvm-objdump, instead of just dumping bytes
int Opcode::DumpRISCV(Stream *s, uint32_t min_byte_width) {
const uint32_t previous_bytes = s->GetWrittenBytes();
// if m_type is not bytes, call Dump
if (m_type != Opcode::eTypeBytes)
return Dump(s, min_byte_width);

// from RISCVPrettyPrinter in llvm-objdump.cpp
// if size % 4 == 0, print as 1 or 2 32 bit values (32 or 64 bit inst)
// else if size % 2 == 0, print as 1 or 3 16 bit values (16 or 48 bit inst)
// else fall back and print bytes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put these comments next to the ifs they refer to, I have to read the code anyway so it makes more sense to me to have the commentary interleaved.

for (uint32_t i = 0; i < m_data.inst.length;) {
if (i > 0)
s->PutChar(' ');
if (!(m_data.inst.length % 4)) {
s->Printf("%2.2x%2.2x%2.2x%2.2x", m_data.inst.bytes[i + 3],
m_data.inst.bytes[i + 2],
m_data.inst.bytes[i + 1],
m_data.inst.bytes[i + 0]);
i += 4;
} else if (!(m_data.inst.length % 2)) {
s->Printf("%2.2x%2.2x", m_data.inst.bytes[i + 1],
m_data.inst.bytes[i + 0]);
i += 2;
} else {
s->Printf("%2.2x", m_data.inst.bytes[i]);
++i;
}
}

uint32_t bytes_written_so_far = s->GetWrittenBytes() - previous_bytes;
// Add spaces to make sure bytes display comes out even in case opcodes aren't
// all the same size.
if (bytes_written_so_far < min_byte_width)
s->Printf("%*s", min_byte_width - bytes_written_so_far, "");
return s->GetWrittenBytes() - previous_bytes;
}

uint32_t Opcode::GetData(DataExtractor &data) const {
uint32_t byte_size = GetByteSize();
uint8_t swap_buf[8];
Expand Down
39 changes: 21 additions & 18 deletions lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class DisassemblerLLVMC::MCDisasmInstance {

uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
lldb::addr_t pc, llvm::MCInst &mc_inst) const;
bool GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
lldb::addr_t pc, llvm::MCInst &mc_inst, size_t &size) const;
void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc,
std::string &inst_string, std::string &comments_string);
void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
Expand Down Expand Up @@ -524,11 +526,11 @@ class InstructionLLVMC : public lldb_private::Instruction {
const addr_t pc = m_address.GetFileAddress();
llvm::MCInst inst;

const size_t inst_size =
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
if (inst_size == 0)
m_opcode.Clear();
else {
size_t inst_size = 0;
m_is_valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len,
pc, inst, inst_size);
m_opcode.Clear();
if (inst_size != 0) {
m_opcode.SetOpcodeBytes(opcode_data, inst_size);
m_is_valid = true;
}
Expand Down Expand Up @@ -604,10 +606,11 @@ class InstructionLLVMC : public lldb_private::Instruction {
const uint8_t *opcode_data = data.GetDataStart();
const size_t opcode_data_len = data.GetByteSize();
llvm::MCInst inst;
size_t inst_size =
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);

if (inst_size > 0) {
size_t inst_size = 0;
bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc,
inst, inst_size);

if (valid && inst_size > 0) {
mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);

const bool saved_use_color = mc_disasm_ptr->GetUseColor();
Expand Down Expand Up @@ -1206,9 +1209,10 @@ class InstructionLLVMC : public lldb_private::Instruction {
const uint8_t *opcode_data = data.GetDataStart();
const size_t opcode_data_len = data.GetByteSize();
llvm::MCInst inst;
const size_t inst_size =
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
if (inst_size == 0)
size_t inst_size = 0;
const bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len,
pc, inst, inst_size);
if (!valid)
return;

m_has_visited_instruction = true;
Expand Down Expand Up @@ -1337,19 +1341,18 @@ DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
}

uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
bool DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
llvm::MCInst &mc_inst) const {
llvm::MCInst &mc_inst, size_t &size) const {
llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
llvm::MCDisassembler::DecodeStatus status;

uint64_t new_inst_size;
status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
status = m_disasm_up->getInstruction(mc_inst, size, data, pc,
llvm::nulls());
if (status == llvm::MCDisassembler::Success)
return new_inst_size;
return true;
else
return 0;
return false;
}

void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
Expand Down
4 changes: 2 additions & 2 deletions lldb/source/Utility/ArchSpec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,9 @@ static const CoreDefinition g_core_definitions[] = {
{eByteOrderLittle, 4, 4, 4, llvm::Triple::hexagon,
ArchSpec::eCore_hexagon_hexagonv5, "hexagonv5"},

{eByteOrderLittle, 4, 2, 4, llvm::Triple::riscv32, ArchSpec::eCore_riscv32,
{eByteOrderLittle, 4, 2, 8, llvm::Triple::riscv32, ArchSpec::eCore_riscv32,
"riscv32"},
{eByteOrderLittle, 8, 2, 4, llvm::Triple::riscv64, ArchSpec::eCore_riscv64,
{eByteOrderLittle, 8, 2, 8, llvm::Triple::riscv64, ArchSpec::eCore_riscv64,
"riscv64"},

{eByteOrderLittle, 4, 4, 4, llvm::Triple::loongarch32,
Expand Down
Loading