Skip to content
84 changes: 84 additions & 0 deletions lldb/examples/python/filter_disasm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
Defines a command, fdis, that does filtered disassembly. The command does the
lldb disassemble command with -b and any other arguments passed in, and
pipes that through a provided filter program.

The intention is to support disassembly of RISC-V proprietary instructions.
This is handled with llvm-objdump by piping the output of llvm-objdump through
a filter program. This script is intended to mimic that workflow.
"""

import lldb
import subprocess

filter_program = "crustfilt"


def __lldb_init_module(debugger, dict):
debugger.HandleCommand("command script add -f filter_disasm.fdis fdis")
print("Disassembly filter command (fdis) loaded")
print("Filter program set to %s" % filter_program)


def fdis(debugger, args, exe_ctx, result, dict):
"""
Call the built in disassembler, then pass its output to a filter program
to add in disassembly for hidden opcodes.
Except for get and set, use the fdis command like the disassemble command.
By default, the filter program is crustfilt, from
https://github.com/quic/crustfilt . This can be changed by changing
the global variable filter_program.

Usage:
fdis [[get] [set <program>] [<disassembly options>]]

Choose one of the following:
get
Gets the current filter program

set <program>
Sets the current filter program. This can be an executable, which
will be found on PATH, or an absolute path.

<disassembly options>
If the first argument is not get or set, the args will be passed
to the disassemble command as is.

"""

global filter_program
args_list = args.split(" ")
result.Clear()

if len(args_list) == 1 and args_list[0] == "get":
result.PutCString(filter_program)
result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
return

if len(args_list) == 2 and args_list[0] == "set":
filter_program = args_list[1]
result.PutCString("Filter program set to %s" % filter_program)
result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
return

res = lldb.SBCommandReturnObject()
debugger.GetCommandInterpreter().HandleCommand("disassemble -b " + args, exe_ctx, res)
if len(res.GetError()) > 0:
result.SetError(res.GetError())
result.SetStatus(lldb.eReturnStatusFailed)
return
output = res.GetOutput()

try:
proc = subprocess.run([filter_program], capture_output=True, text=True, input=output)
except (subprocess.SubprocessError, OSError) as e:
result.PutCString("Error occurred. Original disassembly:\n\n" + output)
result.SetError(str(e))
result.SetStatus(lldb.eReturnStatusFailed)
return

if proc.returncode:
result.PutCString("warning: {} returned non-zero value {}".format(filter_program, proc.returncode))

result.PutCString(proc.stdout)
result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
39 changes: 32 additions & 7 deletions lldb/include/lldb/Core/Opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ class Opcode {
eTypeInvalid,
eType8,
eType16,
eType16_2, // a 32-bit Thumb instruction, made up of two words
eType16_2, // a 32-bit Thumb instruction, made up of two words
eType16_32Tuples, // RISC-V that can have 2, 4, 6, 8 etc byte long
// instructions which will be printed in combinations of
// 16 & 32-bit words.
eType32,
eType64,
eTypeBytes
Expand Down Expand Up @@ -60,9 +63,9 @@ class Opcode {
m_data.inst64 = inst;
}

Opcode(uint8_t *bytes, size_t length)
: m_byte_order(lldb::eByteOrderInvalid) {
SetOpcodeBytes(bytes, length);
Opcode(uint8_t *bytes, size_t length, Opcode::Type type,
lldb::ByteOrder order) {
DoSetOpcodeBytes(bytes, length, type, order);
}

void Clear() {
Expand All @@ -82,6 +85,8 @@ class Opcode {
break;
case Opcode::eType16_2:
break;
case Opcode::eType16_32Tuples:
break;
case Opcode::eType32:
break;
case Opcode::eType64:
Expand All @@ -103,6 +108,8 @@ class Opcode {
: m_data.inst16;
case Opcode::eType16_2:
break;
case Opcode::eType16_32Tuples:
break;
case Opcode::eType32:
break;
case Opcode::eType64:
Expand All @@ -122,6 +129,8 @@ class Opcode {
case Opcode::eType16:
return GetEndianSwap() ? llvm::byteswap<uint16_t>(m_data.inst16)
: m_data.inst16;
case Opcode::eType16_32Tuples:
break;
case Opcode::eType16_2: // passthrough
case Opcode::eType32:
return GetEndianSwap() ? llvm::byteswap<uint32_t>(m_data.inst32)
Expand All @@ -143,6 +152,8 @@ class Opcode {
case Opcode::eType16:
return GetEndianSwap() ? llvm::byteswap<uint16_t>(m_data.inst16)
: m_data.inst16;
case Opcode::eType16_32Tuples:
break;
case Opcode::eType16_2: // passthrough
case Opcode::eType32:
return GetEndianSwap() ? llvm::byteswap<uint32_t>(m_data.inst32)
Expand Down Expand Up @@ -186,20 +197,30 @@ class Opcode {
m_byte_order = order;
}

void SetOpcode16_32TupleBytes(const void *bytes, size_t length,
lldb::ByteOrder order) {
DoSetOpcodeBytes(bytes, length, eType16_32Tuples, order);
}

void SetOpcodeBytes(const void *bytes, size_t length) {
DoSetOpcodeBytes(bytes, length, eTypeBytes, lldb::eByteOrderInvalid);
}

void DoSetOpcodeBytes(const void *bytes, size_t length, Opcode::Type type,
lldb::ByteOrder order) {
if (bytes != nullptr && length > 0) {
m_type = eTypeBytes;
m_type = type;
m_data.inst.length = length;
assert(length < sizeof(m_data.inst.bytes));
memcpy(m_data.inst.bytes, bytes, length);
m_byte_order = lldb::eByteOrderInvalid;
m_byte_order = order;
} else {
m_type = eTypeInvalid;
m_data.inst.length = 0;
}
}

int Dump(Stream *s, uint32_t min_byte_width);
int Dump(Stream *s, uint32_t min_byte_width) const;

const void *GetOpcodeBytes() const {
return ((m_type == Opcode::eTypeBytes) ? m_data.inst.bytes : nullptr);
Expand All @@ -213,6 +234,8 @@ class Opcode {
return sizeof(m_data.inst8);
case Opcode::eType16:
return sizeof(m_data.inst16);
case Opcode::eType16_32Tuples:
return m_data.inst.length;
case Opcode::eType16_2: // passthrough
case Opcode::eType32:
return sizeof(m_data.inst32);
Expand All @@ -238,6 +261,8 @@ class Opcode {
return &m_data.inst8;
case Opcode::eType16:
return &m_data.inst16;
case Opcode::eType16_32Tuples:
return m_data.inst.bytes;
case Opcode::eType16_2: // passthrough
case Opcode::eType32:
return &m_data.inst32;
Expand Down
6 changes: 4 additions & 2 deletions lldb/source/Core/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,10 +685,12 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
}
}
const size_t opcode_pos = ss.GetSizeOfLastLine();
const std::string &opcode_name =
show_color ? m_markup_opcode_name : m_opcode_name;
std::string &opcode_name = show_color ? m_markup_opcode_name : m_opcode_name;
const std::string &mnemonics = show_color ? m_markup_mnemonics : m_mnemonics;

if (opcode_name.empty())
opcode_name = "<unknown>";

// The default opcode size of 7 characters is plenty for most architectures
// but some like arm can pull out the occasional vqrshrun.s16. We won't get
// consistent column spacing in these cases, unfortunately. Also note that we
Expand Down
27 changes: 26 additions & 1 deletion lldb/source/Core/Opcode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
using namespace lldb;
using namespace lldb_private;

int Opcode::Dump(Stream *s, uint32_t min_byte_width) {
int Opcode::Dump(Stream *s, uint32_t min_byte_width) const {
const uint32_t previous_bytes = s->GetWrittenBytes();
switch (m_type) {
case Opcode::eTypeInvalid:
Expand All @@ -38,6 +38,27 @@ int Opcode::Dump(Stream *s, uint32_t min_byte_width) {
s->Printf("0x%8.8x", m_data.inst32);
break;

case Opcode::eType16_32Tuples: {
const bool format_as_words = (m_data.inst.length % 4) == 0;
uint32_t i = 0;
while (i < m_data.inst.length) {
if (i > 0)
s->PutChar(' ');
if (format_as_words) {
// Format as words; print 1 or more UInt32 values.
s->Printf("%2.2x%2.2x%2.2x%2.2x", m_data.inst.bytes[i + 3],
m_data.inst.bytes[i + 2], m_data.inst.bytes[i + 1],
m_data.inst.bytes[i + 0]);
i += 4;
} else {
// Format as halfwords; print 1 or more UInt16 values.
s->Printf("%2.2x%2.2x", m_data.inst.bytes[i + 1],
m_data.inst.bytes[i + 0]);
i += 2;
}
}
} break;

case Opcode::eType64:
s->Printf("0x%16.16" PRIx64, m_data.inst64);
break;
Expand Down Expand Up @@ -69,6 +90,7 @@ lldb::ByteOrder Opcode::GetDataByteOrder() const {
case Opcode::eType8:
case Opcode::eType16:
case Opcode::eType16_2:
case Opcode::eType16_32Tuples:
case Opcode::eType32:
case Opcode::eType64:
return endian::InlHostByteOrder();
Expand Down Expand Up @@ -113,6 +135,9 @@ uint32_t Opcode::GetData(DataExtractor &data) const {
swap_buf[3] = m_data.inst.bytes[2];
buf = swap_buf;
break;
case Opcode::eType16_32Tuples:
buf = GetOpcodeDataBytes();
break;
case Opcode::eType32:
*(uint32_t *)swap_buf = llvm::byteswap<uint32_t>(m_data.inst32);
buf = swap_buf;
Expand Down
57 changes: 34 additions & 23 deletions lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class DisassemblerLLVMC::MCDisasmInstance {

uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
lldb::addr_t pc, llvm::MCInst &mc_inst) const;
bool GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
lldb::addr_t pc, llvm::MCInst &mc_inst, size_t &size) const;
void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc,
std::string &inst_string, std::string &comments_string);
void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
Expand Down Expand Up @@ -486,8 +488,13 @@ class InstructionLLVMC : public lldb_private::Instruction {
break;

default:
m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
min_op_byte_size);
if (arch.GetTriple().isRISCV())
m_opcode.SetOpcode16_32TupleBytes(
data.PeekData(data_offset, min_op_byte_size), min_op_byte_size,
byte_order);
else
m_opcode.SetOpcodeBytes(
data.PeekData(data_offset, min_op_byte_size), min_op_byte_size);
got_op = true;
break;
}
Expand Down Expand Up @@ -524,13 +531,16 @@ class InstructionLLVMC : public lldb_private::Instruction {
const addr_t pc = m_address.GetFileAddress();
llvm::MCInst inst;

const size_t inst_size =
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
if (inst_size == 0)
m_opcode.Clear();
else {
m_opcode.SetOpcodeBytes(opcode_data, inst_size);
m_is_valid = true;
size_t inst_size = 0;
m_is_valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len,
pc, inst, inst_size);
m_opcode.Clear();
if (inst_size != 0) {
if (arch.GetTriple().isRISCV())
m_opcode.SetOpcode16_32TupleBytes(opcode_data, inst_size,
byte_order);
else
m_opcode.SetOpcodeBytes(opcode_data, inst_size);
}
}
}
Expand Down Expand Up @@ -604,10 +614,11 @@ class InstructionLLVMC : public lldb_private::Instruction {
const uint8_t *opcode_data = data.GetDataStart();
const size_t opcode_data_len = data.GetByteSize();
llvm::MCInst inst;
size_t inst_size =
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
size_t inst_size = 0;
bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc,
inst, inst_size);

if (inst_size > 0) {
if (valid && inst_size > 0) {
mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);

const bool saved_use_color = mc_disasm_ptr->GetUseColor();
Expand Down Expand Up @@ -1206,9 +1217,10 @@ class InstructionLLVMC : public lldb_private::Instruction {
const uint8_t *opcode_data = data.GetDataStart();
const size_t opcode_data_len = data.GetByteSize();
llvm::MCInst inst;
const size_t inst_size =
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
if (inst_size == 0)
size_t inst_size = 0;
const bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len,
pc, inst, inst_size);
if (!valid)
return;

m_has_visited_instruction = true;
Expand Down Expand Up @@ -1337,19 +1349,18 @@ DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
}

uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
llvm::MCInst &mc_inst) const {
bool DisassemblerLLVMC::MCDisasmInstance::GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
lldb::addr_t pc,
llvm::MCInst &mc_inst,
size_t &size) const {
llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
llvm::MCDisassembler::DecodeStatus status;

uint64_t new_inst_size;
status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
llvm::nulls());
status = m_disasm_up->getInstruction(mc_inst, size, data, pc, llvm::nulls());
if (status == llvm::MCDisassembler::Success)
return new_inst_size;
return true;
else
return 0;
return false;
}

void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
Expand Down
4 changes: 2 additions & 2 deletions lldb/source/Utility/ArchSpec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,9 @@ static const CoreDefinition g_core_definitions[] = {
{eByteOrderLittle, 4, 4, 4, llvm::Triple::hexagon,
ArchSpec::eCore_hexagon_hexagonv5, "hexagonv5"},

{eByteOrderLittle, 4, 2, 4, llvm::Triple::riscv32, ArchSpec::eCore_riscv32,
{eByteOrderLittle, 4, 2, 8, llvm::Triple::riscv32, ArchSpec::eCore_riscv32,
"riscv32"},
{eByteOrderLittle, 8, 2, 4, llvm::Triple::riscv64, ArchSpec::eCore_riscv64,
{eByteOrderLittle, 8, 2, 8, llvm::Triple::riscv64, ArchSpec::eCore_riscv64,
"riscv64"},

{eByteOrderLittle, 4, 4, 4, llvm::Triple::loongarch32,
Expand Down
8 changes: 8 additions & 0 deletions lldb/test/Shell/Commands/Inputs/dis_filt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#! /usr/bin/env python3

import sys

for line in sys.stdin:
if "0940003f 00200020" in line and "<unknown>" in line:
line = line.replace("<unknown>", "Fake64")
print(line, end="")
Loading
Loading