Skip to content

Commit f40bfca

Browse files
authored
refactor: add InstIter for iterating bytecode, pc, ic (#11956)
1 parent 93e6755 commit f40bfca

File tree

6 files changed

+311
-210
lines changed

6 files changed

+311
-210
lines changed

crates/cast/src/lib.rs

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use foundry_common::{
3535
fs, get_pretty_tx_receipt_attr, shell,
3636
};
3737
use foundry_config::Chain;
38-
use foundry_evm::core::ic::decode_instructions;
38+
use foundry_evm::core::bytecode::InstIter;
3939
use futures::{FutureExt, StreamExt, future::Either};
4040
use op_alloy_consensus::OpTxEnvelope;
4141
use rayon::prelude::*;
@@ -2222,23 +2222,9 @@ impl SimpleCast {
22222222
/// ```
22232223
pub fn disassemble(code: &[u8]) -> Result<String> {
22242224
let mut output = String::new();
2225-
2226-
for step in decode_instructions(code)? {
2227-
write!(output, "{:08x}: ", step.pc)?;
2228-
2229-
if let Some(op) = step.op {
2230-
write!(output, "{op}")?;
2231-
} else {
2232-
write!(output, "INVALID")?;
2233-
}
2234-
2235-
if !step.immediate.is_empty() {
2236-
write!(output, " {}", hex::encode_prefixed(step.immediate))?;
2237-
}
2238-
2239-
writeln!(output)?;
2225+
for (pc, inst) in InstIter::new(code).with_pc() {
2226+
writeln!(output, "{pc:08x}: {inst}")?;
22402227
}
2241-
22422228
Ok(output)
22432229
}
22442230

@@ -2567,23 +2553,21 @@ mod tests {
25672553
fn disassemble_incomplete_sequence() {
25682554
let incomplete = &hex!("60"); // PUSH1
25692555
let disassembled = Cast::disassemble(incomplete).unwrap();
2570-
assert_eq!(disassembled, "00000000: PUSH1 0x00\n");
2556+
assert_eq!(disassembled, "00000000: PUSH1\n");
25712557

25722558
let complete = &hex!("6000"); // PUSH1 0x00
2573-
let disassembled = Cast::disassemble(complete);
2574-
assert!(disassembled.is_ok());
2559+
let disassembled = Cast::disassemble(complete).unwrap();
2560+
assert_eq!(disassembled, "00000000: PUSH1 0x00\n");
25752561

25762562
let incomplete = &hex!("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"); // PUSH32 with 31 bytes
2577-
25782563
let disassembled = Cast::disassemble(incomplete).unwrap();
2564+
assert_eq!(disassembled, "00000000: PUSH32\n");
25792565

2566+
let complete = &hex!("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"); // PUSH32 with 32 bytes
2567+
let disassembled = Cast::disassemble(complete).unwrap();
25802568
assert_eq!(
25812569
disassembled,
2582-
"00000000: PUSH32 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00\n"
2570+
"00000000: PUSH32 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n"
25832571
);
2584-
2585-
let complete = &hex!("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"); // PUSH32 with 32 bytes
2586-
let disassembled = Cast::disassemble(complete);
2587-
assert!(disassembled.is_ok());
25882572
}
25892573
}

crates/evm/core/src/bytecode.rs

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
use revm::bytecode::{OpCode, opcode};
2+
use std::{fmt, slice};
3+
4+
/// An iterator that yields opcodes and their immediate data.
5+
///
6+
/// If the bytecode is not well-formed, the iterator will still yield opcodes, but the immediate
7+
/// data may be incorrect. For example, if the bytecode is `PUSH2 0x69`, the iterator will yield
8+
/// `PUSH2, &[]`.
9+
#[derive(Clone, Debug)]
10+
pub struct InstIter<'a> {
11+
iter: slice::Iter<'a, u8>,
12+
}
13+
14+
impl fmt::Display for InstIter<'_> {
15+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16+
for (i, op) in self.clone().enumerate() {
17+
if i > 0 {
18+
f.write_str(" ")?;
19+
}
20+
write!(f, "{op}")?;
21+
}
22+
Ok(())
23+
}
24+
}
25+
26+
impl<'a> InstIter<'a> {
27+
/// Create a new iterator over the given bytecode slice.
28+
#[inline]
29+
pub fn new(slice: &'a [u8]) -> Self {
30+
Self { iter: slice.iter() }
31+
}
32+
33+
/// Returns a new iterator that also yields the program counter alongside the opcode and
34+
/// immediate data.
35+
#[inline]
36+
pub fn with_pc(self) -> InstIterWithPc<'a> {
37+
InstIterWithPc { iter: self, pc: 0 }
38+
}
39+
40+
/// Returns the inner iterator.
41+
#[inline]
42+
pub fn inner(&self) -> &slice::Iter<'a, u8> {
43+
&self.iter
44+
}
45+
46+
/// Returns the inner iterator.
47+
#[inline]
48+
pub fn inner_mut(&mut self) -> &mut slice::Iter<'a, u8> {
49+
&mut self.iter
50+
}
51+
52+
/// Returns the inner iterator.
53+
#[inline]
54+
pub fn into_inner(self) -> slice::Iter<'a, u8> {
55+
self.iter
56+
}
57+
}
58+
59+
impl<'a> Iterator for InstIter<'a> {
60+
type Item = Inst<'a>;
61+
62+
#[inline]
63+
fn next(&mut self) -> Option<Self::Item> {
64+
self.iter.next().map(|&opcode| {
65+
let opcode = unsafe { OpCode::new_unchecked(opcode) };
66+
let len = imm_len(opcode.get()) as usize;
67+
let (immediate, rest) = self.iter.as_slice().split_at_checked(len).unwrap_or_default();
68+
self.iter = rest.iter();
69+
Inst { opcode, immediate }
70+
})
71+
}
72+
73+
#[inline]
74+
fn size_hint(&self) -> (usize, Option<usize>) {
75+
let len = self.iter.len();
76+
((len != 0) as usize, Some(len))
77+
}
78+
}
79+
80+
impl std::iter::FusedIterator for InstIter<'_> {}
81+
82+
/// A bytecode iterator that yields opcodes and their immediate data, alongside the program counter.
83+
///
84+
/// Created by calling [`InstIter::with_pc`].
85+
#[derive(Debug)]
86+
pub struct InstIterWithPc<'a> {
87+
iter: InstIter<'a>,
88+
pc: usize,
89+
}
90+
91+
impl<'a> Iterator for InstIterWithPc<'a> {
92+
type Item = (usize, Inst<'a>);
93+
94+
#[inline]
95+
fn next(&mut self) -> Option<Self::Item> {
96+
self.iter.next().map(|inst| {
97+
let pc = self.pc;
98+
self.pc += 1 + inst.immediate.len();
99+
(pc, inst)
100+
})
101+
}
102+
103+
#[inline]
104+
fn size_hint(&self) -> (usize, Option<usize>) {
105+
self.iter.size_hint()
106+
}
107+
}
108+
109+
impl std::iter::FusedIterator for InstIterWithPc<'_> {}
110+
111+
/// An opcode and its immediate data. Returned by [`InstIter`].
112+
#[derive(Clone, Copy, PartialEq, Eq)]
113+
pub struct Inst<'a> {
114+
/// The opcode.
115+
pub opcode: OpCode,
116+
/// The immediate data, if any.
117+
///
118+
/// If an opcode is missing immediate data, e.g. malformed or bytecode hash, this will be an
119+
/// empty slice.
120+
pub immediate: &'a [u8],
121+
}
122+
123+
impl fmt::Debug for Inst<'_> {
124+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
125+
fmt::Display::fmt(self, f)
126+
}
127+
}
128+
129+
impl fmt::Display for Inst<'_> {
130+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
131+
write!(f, "{}", self.opcode)?;
132+
match self.immediate {
133+
[] => Ok(()),
134+
imm => write!(f, " {:#x}", alloy_primitives::hex::display(imm)),
135+
}
136+
}
137+
}
138+
139+
/// Returns the length of the immediate data for the given opcode, or `0` if none.
140+
#[inline]
141+
const fn imm_len(op: u8) -> u8 {
142+
match op {
143+
opcode::PUSH1..=opcode::PUSH32 => op - opcode::PUSH0,
144+
_ => 0,
145+
}
146+
}
147+
148+
/// Returns a string representation of the given bytecode.
149+
pub fn format_bytecode(bytecode: &[u8]) -> String {
150+
let mut w = String::new();
151+
format_bytecode_to(bytecode, &mut w).unwrap();
152+
w
153+
}
154+
155+
/// Formats an EVM bytecode to the given writer.
156+
pub fn format_bytecode_to<W: fmt::Write + ?Sized>(bytecode: &[u8], w: &mut W) -> fmt::Result {
157+
write!(w, "{}", InstIter::new(bytecode))
158+
}
159+
160+
#[cfg(test)]
161+
mod tests {
162+
use super::*;
163+
use revm::bytecode::opcode as op;
164+
165+
fn o(op: u8) -> OpCode {
166+
unsafe { OpCode::new_unchecked(op) }
167+
}
168+
169+
#[test]
170+
fn iter_basic() {
171+
let bytecode = [0x01, 0x02, 0x03, 0x04, 0x05];
172+
let mut iter = InstIter::new(&bytecode);
173+
174+
assert_eq!(iter.next(), Some(Inst { opcode: o(0x01), immediate: &[] }));
175+
assert_eq!(iter.next(), Some(Inst { opcode: o(0x02), immediate: &[] }));
176+
assert_eq!(iter.next(), Some(Inst { opcode: o(0x03), immediate: &[] }));
177+
assert_eq!(iter.next(), Some(Inst { opcode: o(0x04), immediate: &[] }));
178+
assert_eq!(iter.next(), Some(Inst { opcode: o(0x05), immediate: &[] }));
179+
assert_eq!(iter.next(), None);
180+
}
181+
182+
#[test]
183+
fn iter_with_imm() {
184+
let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
185+
let mut iter = InstIter::new(&bytecode);
186+
187+
assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH0), immediate: &[] }));
188+
assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH1), immediate: &[0x69] }));
189+
assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH2), immediate: &[0x01, 0x02] }));
190+
assert_eq!(iter.next(), None);
191+
}
192+
193+
#[test]
194+
fn iter_with_imm_too_short() {
195+
let bytecode = [op::PUSH2, 0x69];
196+
let mut iter = InstIter::new(&bytecode);
197+
198+
assert_eq!(iter.next(), Some(Inst { opcode: o(op::PUSH2), immediate: &[] }));
199+
assert_eq!(iter.next(), None);
200+
}
201+
202+
#[test]
203+
fn display() {
204+
let bytecode = [op::PUSH0, op::PUSH1, 0x69, op::PUSH2, 0x01, 0x02];
205+
let s = format_bytecode(&bytecode);
206+
assert_eq!(s, "PUSH0 PUSH1 0x69 PUSH2 0x0102");
207+
}
208+
209+
#[test]
210+
fn decode_push2_and_stop() {
211+
// 0x61 0xAA 0xBB = PUSH2 0xAABB
212+
// 0x00 = STOP
213+
let code = vec![0x61, 0xAA, 0xBB, 0x00];
214+
let insns = InstIter::new(&code).with_pc().collect::<Vec<_>>();
215+
216+
// PUSH2 then STOP
217+
assert_eq!(insns.len(), 2);
218+
219+
// PUSH2 at pc = 0
220+
let i0 = &insns[0];
221+
assert_eq!(i0.0, 0);
222+
assert_eq!(i0.1.opcode, op::PUSH2);
223+
assert_eq!(i0.1.immediate, &[0xAA, 0xBB]);
224+
225+
// STOP at pc = 3
226+
let i1 = &insns[1];
227+
assert_eq!(i1.0, 3);
228+
assert_eq!(i1.1.opcode, op::STOP);
229+
assert!(i1.1.immediate.is_empty());
230+
}
231+
232+
#[test]
233+
fn decode_arithmetic_ops() {
234+
// 0x01 = ADD, 0x02 = MUL, 0x03 = SUB, 0x04 = DIV
235+
let code = vec![0x01, 0x02, 0x03, 0x04];
236+
let insns = InstIter::new(&code).with_pc().collect::<Vec<_>>();
237+
238+
assert_eq!(insns.len(), 4);
239+
240+
let expected = [(0, op::ADD), (1, op::MUL), (2, op::SUB), (3, op::DIV)];
241+
for ((pc, want_op), insn) in expected.iter().zip(insns.iter()) {
242+
assert_eq!(insn.0, *pc);
243+
assert_eq!(insn.1.opcode, *want_op);
244+
assert!(insn.1.immediate.is_empty());
245+
}
246+
}
247+
}

0 commit comments

Comments
 (0)