diff --git a/Cargo.lock b/Cargo.lock index 8fdfe45..dde77c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -176,6 +176,7 @@ version = "0.0.0" dependencies = [ "bitflags", "hex", + "thiserror", ] [[package]] @@ -195,6 +196,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.20" diff --git a/robustone-core/Cargo.toml b/robustone-core/Cargo.toml index dacb107..55b3204 100644 --- a/robustone-core/Cargo.toml +++ b/robustone-core/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] hex = "0.4" bitflags = "2.10.0" +thiserror = "2.0.17" [features] default = ["riscv"] diff --git a/robustone-core/src/architecture.rs b/robustone-core/src/architecture.rs index 72eed6f..fdbc22a 100644 --- a/robustone-core/src/architecture.rs +++ b/robustone-core/src/architecture.rs @@ -3,10 +3,80 @@ //! This module provides utility functions for working with different //! instruction set architectures in a consistent way. -/// Utility functions for working with architectures. -pub struct ArchitectureUtils; +use std::fmt; +use std::fmt::Display; + +#[derive(Debug, PartialEq)] +pub enum Architecture { + RiscV32, + RiscV64, + RiscV32E, + X86, + X86_64, + AArch64, + Arm, + Unknown, +} + +impl Display for Architecture { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl From<&str> for Architecture { + /// Performs the conversion from a string slice (`&str`) into `Architecture`. + /// + /// This uses the same logic as [`Architecture::parse`]. + /// + /// # Examples + /// + /// ```rust + /// use robustone_core::prelude::Architecture; + /// let arch: Architecture = "AMD64".into(); + /// assert_eq!(arch, "x86_64"); + /// ``` + fn from(name: &str) -> Self { + Architecture::parse(name) + } +} + +impl From for Architecture { + /// Performs the conversion from a owned string (`String`) into `Architecture`. + /// + /// This uses the same logic as [`Architecture::parse`]. + fn from(name: String) -> Self { + Architecture::parse(&name) + } +} + +impl PartialEq<&str> for Architecture { + fn eq(&self, other: &&str) -> bool { + self.as_str() == *other + } +} + +impl PartialEq for Architecture { + fn eq(&self, other: &String) -> bool { + self.as_str() == other.as_str() + } +} + +impl Architecture { + /// Return a static string slice representation of the architecture without allocating. + pub fn as_str(&self) -> &'static str { + match self { + Architecture::RiscV32 => "riscv32", + Architecture::RiscV64 => "riscv64", + Architecture::RiscV32E => "riscv32e", + Architecture::X86 => "x86", + Architecture::X86_64 => "x86_64", + Architecture::AArch64 => "aarch64", + Architecture::Arm => "arm", + Architecture::Unknown => "unknown", + } + } -impl ArchitectureUtils { /// Determines the architecture from a name string. /// /// This utility function attempts to match a provided architecture name @@ -18,93 +88,123 @@ impl ArchitectureUtils { /// /// # Returns /// - /// A normalized architecture name, or the original name if no pattern matches. + /// An Architecture enum representing the parsed architecture. + /// + /// Returns `Architecture::Unknown` if the input does not match a known architecture. /// /// # Examples /// /// ```rust /// use robustone_core::prelude::*; - /// assert_eq!(ArchitectureUtils::normalize_name("RISCV32"), "riscv32"); - /// assert_eq!(ArchitectureUtils::normalize_name("x86-64"), "x86_64"); - /// assert_eq!(ArchitectureUtils::normalize_name("armv7"), "arm"); + /// assert_eq!(Architecture::from("RISCV32"), "riscv32"); + /// assert_eq!(Architecture::from("x86-64"), "x86_64"); + /// assert_eq!(Architecture::from("armv7"), "arm"); + /// assert_eq!(Architecture::from("RISCV32"), Architecture::RiscV32); + /// assert_eq!(Architecture::from("x86-64"), Architecture::X86_64); + /// assert_eq!(Architecture::from("ARMV7"), Architecture::Arm); /// ``` - pub fn normalize_name(name: &str) -> String { - let normalized = name.to_lowercase(); + fn parse(name: impl AsRef) -> Self { + let normalized = name.as_ref().to_lowercase(); match normalized.as_str() { // RISC-V variants n if n.starts_with("riscv") => { if n.contains("e") { - "riscv32e".to_string() + Architecture::RiscV32E } else if n.contains("32") { - "riscv32".to_string() + Architecture::RiscV32 } else if n.contains("64") { - "riscv64".to_string() + Architecture::RiscV64 } else { - "riscv".to_string() + Architecture::RiscV32 } } // x86 variants n if n.starts_with("x86") || n.starts_with("i386") || n.starts_with("amd64") => { if n.contains("64") { - "x86_64".to_string() + Architecture::X86_64 } else { - "x86".to_string() + Architecture::X86 } } // ARM variants + n if n.starts_with("aarch64") => Architecture::AArch64, n if n.starts_with("arm") => { - if n.contains("64") || n.starts_with("aarch64") { - "aarch64".to_string() + if n.contains("64") { + Architecture::AArch64 } else { - "arm".to_string() + Architecture::Arm } } // Return normalized version for unknown architectures - _ => normalized, + _ => Architecture::Unknown, } } +} - /// Validates that a byte sequence is properly aligned for an architecture. - /// - /// # Arguments - /// - /// * `address` - The memory address to check - /// * `alignment` - The required alignment in bytes - /// - /// # Returns - /// - /// `true` if the address is properly aligned, `false` otherwise. - pub fn is_address_aligned(address: u64, alignment: usize) -> bool { - address.is_multiple_of(alignment as u64) - } +/// Validates that a byte sequence is properly aligned for an architecture. +/// +/// # Arguments +/// +/// * `address` - The memory address to check +/// * `alignment` - The required alignment in bytes +/// +/// # Returns +/// +/// `true` if the address is properly aligned, `false` otherwise. +pub fn is_address_aligned(address: u64, alignment: usize) -> bool { + address.is_multiple_of(alignment as u64) } #[cfg(test)] mod tests { use super::*; + #[test] + fn test_architecture_utils_determine_architecture() { + assert_eq!(Architecture::from("RISCV32"), Architecture::RiscV32); + assert_eq!(Architecture::from("riscv64"), Architecture::RiscV64); + assert_eq!(Architecture::from("RISCV32E"), Architecture::RiscV32E); + assert_eq!(Architecture::from("x86-64"), Architecture::X86_64); + assert_eq!(Architecture::from("AMD64"), Architecture::X86_64); + assert_eq!(Architecture::from("aarch64"), Architecture::AArch64); + assert_eq!(Architecture::from("ARMV7"), Architecture::Arm); + assert_eq!(Architecture::from("unknown"), Architecture::Unknown); + } + + #[test] + fn test_architecture_utils_determine_architecture_variants() { + assert_eq!(Architecture::from("riscv32"), Architecture::RiscV32); + assert_eq!(Architecture::from("riscv64"), Architecture::RiscV64); + assert_eq!(Architecture::from("riscv32e"), Architecture::RiscV32E); + assert_eq!(Architecture::from("x86"), Architecture::X86); + assert_eq!(Architecture::from("x86_64"), Architecture::X86_64); + assert_eq!(Architecture::from("aarch64"), Architecture::AArch64); + assert_eq!(Architecture::from("arm"), Architecture::Arm); + assert_eq!(Architecture::from("mips"), Architecture::Unknown); + } + #[test] fn test_architecture_utils_normalize_name() { - assert_eq!(ArchitectureUtils::normalize_name("RISCV32"), "riscv32"); - assert_eq!(ArchitectureUtils::normalize_name("riscv64"), "riscv64"); - assert_eq!(ArchitectureUtils::normalize_name("RISCV32E"), "riscv32e"); - assert_eq!(ArchitectureUtils::normalize_name("x86-64"), "x86_64"); - assert_eq!(ArchitectureUtils::normalize_name("AMD64"), "x86_64"); - assert_eq!(ArchitectureUtils::normalize_name("aarch64"), "aarch64"); - assert_eq!(ArchitectureUtils::normalize_name("ARMV7"), "arm"); - assert_eq!(ArchitectureUtils::normalize_name("unknown"), "unknown"); + assert_eq!(Architecture::from("RISCV32"), "riscv32"); + assert_eq!(Architecture::from("riscv64"), "riscv64"); + assert_eq!(Architecture::from("RISCV32E"), "riscv32e"); + assert_eq!(Architecture::from("x86-64"), "x86_64"); + assert_eq!(Architecture::from("AMD64"), "x86_64"); + assert_eq!(Architecture::from("aarch64"), "aarch64"); + assert_eq!(Architecture::from("ARMV7"), "arm"); + assert_eq!(Architecture::from("unknown"), "unknown"); } #[test] fn test_architecture_utils_alignment() { - assert!(ArchitectureUtils::is_address_aligned(0x1000, 4)); - assert!(ArchitectureUtils::is_address_aligned(0x1004, 4)); - assert!(!ArchitectureUtils::is_address_aligned(0x1002, 4)); - assert!(ArchitectureUtils::is_address_aligned(0x1000, 8)); - assert!(!ArchitectureUtils::is_address_aligned(0x1004, 8)); + assert!(is_address_aligned(0x1000, 4)); + assert!(is_address_aligned(0x1004, 4)); + assert!(!is_address_aligned(0x1002, 4)); + assert!(is_address_aligned(0x1000, 8)); + assert!(!is_address_aligned(0x1004, 8)); } } diff --git a/robustone-core/src/error.rs b/robustone-core/src/error.rs index f7db7b3..5490170 100644 --- a/robustone-core/src/error.rs +++ b/robustone-core/src/error.rs @@ -1,29 +1,14 @@ +use thiserror::Error; + /// Errors produced by the architecture-agnostic disassembly layer. -#[derive(Debug)] +#[derive(Error, Debug)] pub enum DisasmError { + #[error("ERROR: Unsupported architecture: {0}")] UnsupportedArchitecture(String), + #[error("ERROR: Decoding failed: {0}")] DecodingError(String), + #[error("ERROR: invalid assembly code: {0}")] InvalidHexCode(String), + #[error("ERROR: invalid address argument: {0}")] InvalidAddress(String), } - -impl std::fmt::Display for DisasmError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - DisasmError::UnsupportedArchitecture(arch) => { - write!(f, "ERROR: Unsupported architecture: {arch}") - } - DisasmError::DecodingError(msg) => { - write!(f, "ERROR: Decoding failed: {msg}") - } - DisasmError::InvalidHexCode(msg) => { - write!(f, "ERROR: invalid assembly code: {msg}") - } - DisasmError::InvalidAddress(msg) => { - write!(f, "ERROR: invalid address argument: {msg}") - } - } - } -} - -impl std::error::Error for DisasmError {} diff --git a/robustone-core/src/instruction.rs b/robustone-core/src/instruction.rs index 2c48a66..1654984 100644 --- a/robustone-core/src/instruction.rs +++ b/robustone-core/src/instruction.rs @@ -5,12 +5,43 @@ use std::collections::HashMap; +use crate::riscv::arch::RiscVInstructionDetail; + +#[derive(Debug, Clone)] +pub enum AllInstructionDetail { + BasicInstructionDetail(BasicInstructionDetail), + RiscVInstructionDetail(RiscVInstructionDetail), +} + +impl AsRef for AllInstructionDetail { + fn as_ref(&self) -> &(dyn InstructionDetail + 'static) { + match self { + AllInstructionDetail::BasicInstructionDetail(detail) => detail, + AllInstructionDetail::RiscVInstructionDetail(detail) => detail, + } + } +} + +impl InstructionDetail for AllInstructionDetail { + fn architecture_name(&self) -> &'static str { + self.as_ref().architecture_name() + } + + fn registers_read(&self) -> &[u32] { + self.as_ref().registers_read() + } + + fn registers_written(&self) -> &[u32] { + self.as_ref().registers_written() + } +} + /// Decoded instruction returned by the disassembler. /// /// This is the legacy instruction structure that maintains backward compatibility /// with existing code while providing the essential information needed for /// most disassembly use cases. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Instruction { /// The memory address where this instruction would be located. /// @@ -52,24 +83,7 @@ pub struct Instruction { /// structured information beyond the basic text representation. /// The content is architecture-specific and should be handled /// with appropriate type checking or pattern matching. - pub detail: Option>, -} - -impl Clone for Instruction { - fn clone(&self) -> Self { - Self { - address: self.address, - bytes: self.bytes.clone(), - mnemonic: self.mnemonic.clone(), - operands: self.operands.clone(), - size: self.size, - detail: self.detail.as_ref().map(|_d| { - // For now, we'll use BasicInstructionDetail as a fallback for cloning - // This maintains compatibility while avoiding the complex clone trait - Box::new(BasicInstructionDetail::new("cloned")) as Box - }), - } - } + pub detail: Option, } impl Default for Instruction { @@ -146,7 +160,7 @@ impl Instruction { bytes: Vec, mnemonic: String, operands: String, - detail: Box, + detail: AllInstructionDetail, ) -> Self { let size = bytes.len(); Self { @@ -190,7 +204,7 @@ impl Instruction { mnemonic, operands, size, - detail: Some(Box::new(detail)), + detail: Some(AllInstructionDetail::BasicInstructionDetail(detail)), } } @@ -404,7 +418,7 @@ mod tests { vec![0x01, 0x02, 0x03, 0x04], "test".to_string(), "r1, r2".to_string(), - Box::new(detail), + AllInstructionDetail::BasicInstructionDetail(detail), ); assert_eq!(instruction.mnemonic, "test"); diff --git a/robustone-core/src/lib.rs b/robustone-core/src/lib.rs index c21b90d..8133d1c 100644 --- a/robustone-core/src/lib.rs +++ b/robustone-core/src/lib.rs @@ -54,15 +54,16 @@ pub mod utils; pub mod prelude { pub use crate::ArchitectureHandler; pub use crate::error::DisasmError; - pub use crate::instruction::{Instruction, InstructionDetail}; + pub use crate::instruction::{AllInstructionDetail, Instruction, InstructionDetail}; pub use crate::utils::{Endianness, HexParser}; // Re-export architecture utilities - pub use crate::architecture::ArchitectureUtils; + pub use crate::architecture::{Architecture, is_address_aligned}; } use crate::error::DisasmError; use crate::instruction::Instruction; +pub use crate::instruction::InstructionDetail; use crate::utils::HexParser; /// Trait that all architecture-specific disassemblers must implement. diff --git a/robustone-core/src/riscv/mod.rs b/robustone-core/src/riscv/mod.rs index 33ccdcd..14458fe 100644 --- a/robustone-core/src/riscv/mod.rs +++ b/robustone-core/src/riscv/mod.rs @@ -16,7 +16,11 @@ pub mod printer; pub mod shared; pub mod types; -use crate::{ArchitectureHandler, error::DisasmError, instruction::Instruction}; +use crate::{ + ArchitectureHandler, + error::DisasmError, + instruction::{AllInstructionDetail, Instruction}, +}; use arch::RiscVInstructionDetail; use decoder::{RiscVDecoder, Xlen}; use extensions::Extensions; @@ -94,7 +98,7 @@ impl ArchitectureHandler for RiscVHandler { bytes[..decoded.size].to_vec(), decoded.mnemonic, decoded.operands, - Box::new(riscv_detail), + AllInstructionDetail::RiscVInstructionDetail(riscv_detail), ), decoded.size, )) diff --git a/robustone-core/src/riscv/printer.rs b/robustone-core/src/riscv/printer.rs index f74dee2..798be26 100644 --- a/robustone-core/src/riscv/printer.rs +++ b/robustone-core/src/riscv/printer.rs @@ -3,7 +3,7 @@ //! Inspired by Capstone's printer to maintain compatible output formatting. use super::types::*; -use crate::Instruction; +use crate::{Instruction, InstructionDetail as _}; /// Pretty-printer for RISC-V instructions. pub struct RiscVPrinter { diff --git a/robustone-core/src/riscv/shared/encoding.rs b/robustone-core/src/riscv/shared/encoding.rs index 9e59e1c..8b2c560 100644 --- a/robustone-core/src/riscv/shared/encoding.rs +++ b/robustone-core/src/riscv/shared/encoding.rs @@ -475,6 +475,7 @@ mod tests { let decoder = DefaultSignExtender::new(); // Test R-type: add x1, x2, x3 + #[allow(clippy::unusual_byte_groupings)] let instruction = 0b0000000_00011_00010_000_00001_0110011; let r_fields = decoder.extract_r_type(instruction); assert_eq!(r_fields.opcode, 0b0110011); @@ -485,6 +486,7 @@ mod tests { assert_eq!(r_fields.funct7, 0b0000000); // Test I-type: addi x1, x2, 5 + #[allow(clippy::unusual_byte_groupings)] let instruction = 0b000000000101_00010_000_00001_0010011; let i_fields = decoder.extract_i_type(instruction); assert_eq!(i_fields.opcode, 0b0010011); @@ -494,6 +496,7 @@ mod tests { assert_eq!(i_fields.imm, 5); // Test U-type: lui x5, 0x10 + #[allow(clippy::unusual_byte_groupings)] let instruction = 0b0001_0000_0000_0000_0000_00101_0110111; let u_fields = decoder.extract_u_type(instruction); assert_eq!(u_fields.opcode, 0b0110111); @@ -523,6 +526,7 @@ mod tests { assert_eq!(convenience::sign_extend(0x800, 12), -2048); assert_eq!(convenience::sign_extend_16(0x8000, 16), -32768); + #[allow(clippy::unusual_byte_groupings)] let instruction = 0b0000000_00011_00010_000_00001_0110011; let fields = convenience::extract_fields(instruction); assert_eq!(fields.rd, 1);