llvm · tedwoodward · Jul 15, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025
@@ -0,0 +1,84 @@
+"""
+Defines a command, fdis, that does filtered disassembly. The command does the
+lldb disassemble command with -b and any other arguments passed in, and
+pipes that through a provided filter program.
+
+The intention is to support disassembly of RISC-V proprietary instructions.
+This is handled with llvm-objdump by piping the output of llvm-objdump through
+a filter program. This script is intended to mimic that workflow.
+"""
+
+import lldb
+import subprocess
+
+filter_program = "crustfilt"
+
+
+def __lldb_init_module(debugger, dict):
+    debugger.HandleCommand("command script add -f filter_disasm.fdis fdis")
+    print("Disassembly filter command (fdis) loaded")
+    print("Filter program set to %s" % filter_program)
+
+
+def fdis(debugger, args, exe_ctx, result, dict):
+    """
+  Call the built in disassembler, then pass its output to a filter program
+  to add in disassembly for hidden opcodes.
+  Except for get and set, use the fdis command like the disassemble command.
+  By default, the filter program is crustfilt, from
+  https://github.com/quic/crustfilt . This can be changed by changing
+  the global variable filter_program.
+
+  Usage:
+    fdis [[get] [set <program>] [<disassembly options>]]
+
+    Choose one of the following:
+        get
+            Gets the current filter program
+
+        set <program>
+            Sets the current filter program. This can be an executable, which
+            will be found on PATH, or an absolute path.
+
+        <disassembly options>
+            If the first argument is not get or set, the args will be passed
+            to the disassemble command as is.
+
+    """
+
+    global filter_program
+    args_list = args.split(" ")
+    result.Clear()
+
+    if len(args_list) == 1 and args_list[0] == "get":
+        result.PutCString(filter_program)
+        result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
+        return
+
+    if len(args_list) == 2 and args_list[0] == "set":
+        filter_program = args_list[1]
+        result.PutCString("Filter program set to %s" % filter_program)
+        result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
+        return
+
+    res = lldb.SBCommandReturnObject()
+    debugger.GetCommandInterpreter().HandleCommand("disassemble -b " + args, exe_ctx, res)
+    if len(res.GetError()) > 0:
+        result.SetError(res.GetError())
+        result.SetStatus(lldb.eReturnStatusFailed)
+        return
+    output = res.GetOutput()
+
+    try:
+        proc = subprocess.run([filter_program], capture_output=True, text=True, input=output)
+    except (subprocess.SubprocessError, OSError) as e:
+        result.PutCString("Error occurred. Original disassembly:\n\n" + output)
+        result.SetError(str(e))
+        result.SetStatus(lldb.eReturnStatusFailed)
+        return
+
+    if proc.returncode:
+        result.PutCString("warning: {} returned non-zero value {}".format(filter_program, proc.returncode))
+
+    result.PutCString(proc.stdout)
+    result.SetStatus(lldb.eReturnStatusSuccessFinishResult)
@@ -32,7 +32,10 @@ class Opcode {
     eTypeInvalid,
     eType8,
     eType16,
-    eType16_2, // a 32-bit Thumb instruction, made up of two words
+    eType16_2,        // a 32-bit Thumb instruction, made up of two words
+    eType16_32Tuples, // RISC-V that can have 2, 4, 6, 8 etc byte long
+                      // instructions which will be printed in combinations of
+                      // 16 & 32-bit words.
     eType32,
     eType64,
     eTypeBytes
@@ -60,9 +63,9 @@ class Opcode {
     m_data.inst64 = inst;
   }
 
-  Opcode(uint8_t *bytes, size_t length)
-      : m_byte_order(lldb::eByteOrderInvalid) {
-    SetOpcodeBytes(bytes, length);
+  Opcode(uint8_t *bytes, size_t length, Opcode::Type type,
+         lldb::ByteOrder order) {
+    DoSetOpcodeBytes(bytes, length, type, order);
   }
 
   void Clear() {
@@ -82,6 +85,8 @@ class Opcode {
       break;
     case Opcode::eType16_2:
       break;
+    case Opcode::eType16_32Tuples:
+      break;
     case Opcode::eType32:
       break;
     case Opcode::eType64:
@@ -103,6 +108,8 @@ class Opcode {
                              : m_data.inst16;
     case Opcode::eType16_2:
       break;
+    case Opcode::eType16_32Tuples:
+      break;
     case Opcode::eType32:
       break;
     case Opcode::eType64:
@@ -122,6 +129,8 @@ class Opcode {
     case Opcode::eType16:
       return GetEndianSwap() ? llvm::byteswap<uint16_t>(m_data.inst16)
                              : m_data.inst16;
+    case Opcode::eType16_32Tuples:
+      break;
     case Opcode::eType16_2: // passthrough
     case Opcode::eType32:
       return GetEndianSwap() ? llvm::byteswap<uint32_t>(m_data.inst32)
@@ -143,6 +152,8 @@ class Opcode {
     case Opcode::eType16:
       return GetEndianSwap() ? llvm::byteswap<uint16_t>(m_data.inst16)
                              : m_data.inst16;
+    case Opcode::eType16_32Tuples:
+      break;
     case Opcode::eType16_2: // passthrough
     case Opcode::eType32:
       return GetEndianSwap() ? llvm::byteswap<uint32_t>(m_data.inst32)
@@ -186,20 +197,30 @@ class Opcode {
     m_byte_order = order;
   }
 
+  void SetOpcode16_32TupleBytes(const void *bytes, size_t length,
+                                lldb::ByteOrder order) {
+    DoSetOpcodeBytes(bytes, length, eType16_32Tuples, order);
+  }
+
   void SetOpcodeBytes(const void *bytes, size_t length) {
+    DoSetOpcodeBytes(bytes, length, eTypeBytes, lldb::eByteOrderInvalid);
+  }
+
+  void DoSetOpcodeBytes(const void *bytes, size_t length, Opcode::Type type,
+                        lldb::ByteOrder order) {
     if (bytes != nullptr && length > 0) {
-      m_type = eTypeBytes;
+      m_type = type;
       m_data.inst.length = length;
       assert(length < sizeof(m_data.inst.bytes));
       memcpy(m_data.inst.bytes, bytes, length);
-      m_byte_order = lldb::eByteOrderInvalid;
+      m_byte_order = order;
     } else {
       m_type = eTypeInvalid;
       m_data.inst.length = 0;
     }
   }
 
-  int Dump(Stream *s, uint32_t min_byte_width);
+  int Dump(Stream *s, uint32_t min_byte_width) const;
 
   const void *GetOpcodeBytes() const {
     return ((m_type == Opcode::eTypeBytes) ? m_data.inst.bytes : nullptr);
@@ -213,6 +234,8 @@ class Opcode {
       return sizeof(m_data.inst8);
     case Opcode::eType16:
       return sizeof(m_data.inst16);
+    case Opcode::eType16_32Tuples:
+      return m_data.inst.length;
     case Opcode::eType16_2: // passthrough
     case Opcode::eType32:
       return sizeof(m_data.inst32);
@@ -238,6 +261,8 @@ class Opcode {
       return &m_data.inst8;
     case Opcode::eType16:
       return &m_data.inst16;
+    case Opcode::eType16_32Tuples:
+      return m_data.inst.bytes;
     case Opcode::eType16_2: // passthrough
     case Opcode::eType32:
       return &m_data.inst32;

@@ -685,10 +685,12 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
     }
   }
   const size_t opcode_pos = ss.GetSizeOfLastLine();
-  const std::string &opcode_name =
-      show_color ? m_markup_opcode_name : m_opcode_name;
+  std::string &opcode_name = show_color ? m_markup_opcode_name : m_opcode_name;
   const std::string &mnemonics = show_color ? m_markup_mnemonics : m_mnemonics;
 
+  if (opcode_name.empty())
+    opcode_name = "<unknown>";
+
   // The default opcode size of 7 characters is plenty for most architectures
   // but some like arm can pull out the occasional vqrshrun.s16.  We won't get
   // consistent column spacing in these cases, unfortunately. Also note that we

@@ -21,7 +21,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-int Opcode::Dump(Stream *s, uint32_t min_byte_width) {
+int Opcode::Dump(Stream *s, uint32_t min_byte_width) const {
   const uint32_t previous_bytes = s->GetWrittenBytes();
   switch (m_type) {
   case Opcode::eTypeInvalid:
@@ -38,6 +38,27 @@ int Opcode::Dump(Stream *s, uint32_t min_byte_width) {
     s->Printf("0x%8.8x", m_data.inst32);
     break;
 
+  case Opcode::eType16_32Tuples: {
+    const bool format_as_words = (m_data.inst.length % 4) == 0;
+    uint32_t i = 0;
+    while (i < m_data.inst.length) {
+      if (i > 0)
+        s->PutChar(' ');
+      if (format_as_words) {
+        // Format as words; print 1 or more UInt32 values.
+        s->Printf("%2.2x%2.2x%2.2x%2.2x", m_data.inst.bytes[i + 3],
+                  m_data.inst.bytes[i + 2], m_data.inst.bytes[i + 1],
+                  m_data.inst.bytes[i + 0]);
+        i += 4;
+      } else {
+        // Format as halfwords; print 1 or more UInt16 values.
+        s->Printf("%2.2x%2.2x", m_data.inst.bytes[i + 1],
+                  m_data.inst.bytes[i + 0]);
+        i += 2;
+      }
+    }
+  } break;
+
   case Opcode::eType64:
     s->Printf("0x%16.16" PRIx64, m_data.inst64);
     break;
@@ -69,6 +90,7 @@ lldb::ByteOrder Opcode::GetDataByteOrder() const {
   case Opcode::eType8:
   case Opcode::eType16:
   case Opcode::eType16_2:
+  case Opcode::eType16_32Tuples:
   case Opcode::eType32:
   case Opcode::eType64:
     return endian::InlHostByteOrder();
@@ -113,6 +135,9 @@ uint32_t Opcode::GetData(DataExtractor &data) const {
         swap_buf[3] = m_data.inst.bytes[2];
         buf = swap_buf;
         break;
+      case Opcode::eType16_32Tuples:
+        buf = GetOpcodeDataBytes();
+        break;
       case Opcode::eType32:
         *(uint32_t *)swap_buf = llvm::byteswap<uint32_t>(m_data.inst32);
         buf = swap_buf;

@@ -61,6 +61,8 @@ class DisassemblerLLVMC::MCDisasmInstance {
 
   uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
                      lldb::addr_t pc, llvm::MCInst &mc_inst) const;
+  bool GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
+                 lldb::addr_t pc, llvm::MCInst &mc_inst, size_t &size) const;
   void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc,
                    std::string &inst_string, std::string &comments_string);
   void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
@@ -486,8 +488,13 @@ class InstructionLLVMC : public lldb_private::Instruction {
           break;
 
         default:
-          m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
-                                  min_op_byte_size);
+          if (arch.GetTriple().isRISCV())
+            m_opcode.SetOpcode16_32TupleBytes(
+                data.PeekData(data_offset, min_op_byte_size), min_op_byte_size,
+                byte_order);
+          else
+            m_opcode.SetOpcodeBytes(
+                data.PeekData(data_offset, min_op_byte_size), min_op_byte_size);
           got_op = true;
           break;
         }
@@ -524,13 +531,16 @@ class InstructionLLVMC : public lldb_private::Instruction {
           const addr_t pc = m_address.GetFileAddress();
           llvm::MCInst inst;
 
-          const size_t inst_size =
-              mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
-          if (inst_size == 0)
-            m_opcode.Clear();
-          else {
-            m_opcode.SetOpcodeBytes(opcode_data, inst_size);
-            m_is_valid = true;
+          size_t inst_size = 0;
+          m_is_valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len,
+                                                pc, inst, inst_size);
+          m_opcode.Clear();
+          if (inst_size != 0) {
+            if (arch.GetTriple().isRISCV())
+              m_opcode.SetOpcode16_32TupleBytes(opcode_data, inst_size,
+                                                byte_order);
+            else
+              m_opcode.SetOpcodeBytes(opcode_data, inst_size);
           }
         }
       }
@@ -604,10 +614,11 @@ class InstructionLLVMC : public lldb_private::Instruction {
         const uint8_t *opcode_data = data.GetDataStart();
         const size_t opcode_data_len = data.GetByteSize();
         llvm::MCInst inst;
-        size_t inst_size =
-            mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
+        size_t inst_size = 0;
+        bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc,
+                                              inst, inst_size);
 
-        if (inst_size > 0) {
+        if (valid && inst_size > 0) {
           mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
 
           const bool saved_use_color = mc_disasm_ptr->GetUseColor();
@@ -1206,9 +1217,10 @@ class InstructionLLVMC : public lldb_private::Instruction {
     const uint8_t *opcode_data = data.GetDataStart();
     const size_t opcode_data_len = data.GetByteSize();
     llvm::MCInst inst;
-    const size_t inst_size =
-        mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
-    if (inst_size == 0)
+    size_t inst_size = 0;
+    const bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len,
+                                                pc, inst, inst_size);
+    if (!valid)
       return;
 
     m_has_visited_instruction = true;
@@ -1337,19 +1349,18 @@ DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
          m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
 }
 
-uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
-    const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
-    llvm::MCInst &mc_inst) const {
+bool DisassemblerLLVMC::MCDisasmInstance::GetMCInst(const uint8_t *opcode_data,                                                     size_t opcode_data_len,
+                                                    lldb::addr_t pc,
+                                                    llvm::MCInst &mc_inst,
+                                                    size_t &size) const {
   llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
   llvm::MCDisassembler::DecodeStatus status;
 
-  uint64_t new_inst_size;
-  status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
-                                       llvm::nulls());
+  status = m_disasm_up->getInstruction(mc_inst, size, data, pc, llvm::nulls());
   if (status == llvm::MCDisassembler::Success)
-    return new_inst_size;
+    return true;
   else
-    return 0;
+    return false;
 }
 
 void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(

@@ -228,9 +228,9 @@ static const CoreDefinition g_core_definitions[] = {
     {eByteOrderLittle, 4, 4, 4, llvm::Triple::hexagon,
      ArchSpec::eCore_hexagon_hexagonv5, "hexagonv5"},
 
-    {eByteOrderLittle, 4, 2, 4, llvm::Triple::riscv32, ArchSpec::eCore_riscv32,
+    {eByteOrderLittle, 4, 2, 8, llvm::Triple::riscv32, ArchSpec::eCore_riscv32,
      "riscv32"},
-    {eByteOrderLittle, 8, 2, 4, llvm::Triple::riscv64, ArchSpec::eCore_riscv64,
+    {eByteOrderLittle, 8, 2, 8, llvm::Triple::riscv64, ArchSpec::eCore_riscv64,
      "riscv64"},
 
     {eByteOrderLittle, 4, 4, 4, llvm::Triple::loongarch32,

@@ -0,0 +1,8 @@
+#! /usr/bin/env python3
+
+import sys
+
+for line in sys.stdin:
+    if "0940003f 00200020" in line and "<unknown>" in line:
+        line = line.replace("<unknown>", "Fake64")
+    print(line, end="")