Skip to content

Commit 292ca09

Browse files
feat: implement placeholder for bytecode
1 parent 590bc9f commit 292ca09

File tree

5 files changed

+370
-9
lines changed

5 files changed

+370
-9
lines changed

packages/testing/src/execution_testing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
Opcode,
108108
OpcodeCallArg,
109109
Opcodes,
110+
Placeholder,
110111
call_return_code,
111112
)
112113

@@ -170,6 +171,7 @@
170171
"OpcodeCallArg",
171172
"Opcodes",
172173
"ParameterSet",
174+
"Placeholder",
173175
"ReferenceSpec",
174176
"ReferenceSpecTypes",
175177
"Removable",

packages/testing/src/execution_testing/vm/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
OpcodeBase,
66
OpcodeGasCalculator,
77
)
8-
from .bytecode import Bytecode
8+
from .bytecode import Bytecode, Placeholder
99
from .helpers import MemoryVariable, call_return_code
1010
from .opcodes import (
1111
Macro,
@@ -30,5 +30,6 @@
3030
"OpcodeCallArg",
3131
"OpcodeGasCalculator",
3232
"Opcodes",
33+
"Placeholder",
3334
"call_return_code",
3435
)

packages/testing/src/execution_testing/vm/bytecode.py

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Ethereum Virtual Machine bytecode primitives and utilities."""
22

3-
from typing import Any, List, Self, SupportsBytes, Type
3+
from typing import Any, Dict, List, Self, SupportsBytes, Type
44

55
from pydantic import GetCoreSchemaHandler
66
from pydantic_core.core_schema import (
@@ -14,6 +14,49 @@
1414
from .bases import ForkOpcodeInterface, OpcodeBase
1515

1616

17+
class Placeholder:
18+
"""
19+
Placeholder for a value to be filled in later.
20+
21+
Used to break circular dependencies where a value (like gas cost)
22+
needs to be embedded in bytecode, but the bytecode structure is
23+
needed to calculate that value.
24+
25+
Since PUSH1-PUSH32 all have the same gas cost (G_VERY_LOW = 3),
26+
the placeholder can be used in gas calculations before being filled.
27+
28+
Example:
29+
>>> loop_cost = Placeholder(width=2)
30+
>>> code = Op.JUMPI(Op.GT(Op.GAS, loop_cost), 0)
31+
>>> actual_cost = code.gas_cost(fork)
32+
>>> final_code = code.fill(loop_cost, actual_cost)
33+
34+
"""
35+
36+
_counter: int = 0
37+
38+
def __init__(self, width: int = 2):
39+
"""
40+
Create a placeholder with a specific byte width.
41+
42+
Args:
43+
width: Number of bytes (1-32). Determines max value:
44+
- width=1: max 255 (PUSH1)
45+
- width=2: max 65535 (PUSH2)
46+
- width=3: max 16777215 (PUSH3)
47+
48+
"""
49+
if not 1 <= width <= 32:
50+
raise ValueError("width must be between 1 and 32")
51+
self.width = width
52+
self._id = Placeholder._counter
53+
Placeholder._counter += 1
54+
55+
def __repr__(self) -> str:
56+
"""Return string representation of the placeholder."""
57+
return f"Placeholder(width={self.width}, id={self._id})"
58+
59+
1760
class Bytecode:
1861
"""
1962
Base class to represent EVM bytecode.
@@ -41,6 +84,7 @@ class Bytecode:
4184

4285
terminating: bool
4386
opcode_list: List[OpcodeBase]
87+
_placeholders: Dict[Placeholder, int]
4488

4589
def __new__(
4690
cls,
@@ -67,6 +111,7 @@ def __new__(
67111
instance.terminating = False
68112
instance._name_ = name
69113
instance.opcode_list = opcode_list
114+
instance._placeholders = {}
70115
return instance
71116

72117
if isinstance(bytes_or_byte_code_base, Bytecode):
@@ -81,6 +126,7 @@ def __new__(
81126
obj.terminating = bytes_or_byte_code_base.terminating
82127
obj.opcode_list = bytes_or_byte_code_base.opcode_list[:]
83128
obj._name_ = bytes_or_byte_code_base._name_
129+
obj._placeholders = bytes_or_byte_code_base._placeholders.copy()
84130
return obj
85131

86132
if isinstance(bytes_or_byte_code_base, bytes):
@@ -103,6 +149,7 @@ def __new__(
103149
obj.terminating = terminating
104150
obj.opcode_list = opcode_list
105151
obj._name_ = name
152+
obj._placeholders = {}
106153
return obj
107154

108155
raise TypeError(
@@ -217,7 +264,7 @@ def __add__(self, other: "Bytecode | bytes | int | None") -> "Bytecode":
217264
c_min + a_max - a_min, c_min - a_pop + a_push + b_max - b_min
218265
)
219266

220-
return Bytecode(
267+
c = Bytecode(
221268
bytes(self) + bytes(other),
222269
popped_stack_items=c_pop,
223270
pushed_stack_items=c_push,
@@ -226,6 +273,11 @@ def __add__(self, other: "Bytecode | bytes | int | None") -> "Bytecode":
226273
terminating=other.terminating,
227274
opcode_list=self.opcode_list + other.opcode_list,
228275
)
276+
# Merge placeholders, adjusting offsets for 'other'
277+
c._placeholders = self._placeholders.copy()
278+
for placeholder, offset in other._placeholders.items():
279+
c._placeholders[placeholder] = offset + len(self)
280+
return c
229281

230282
def __radd__(self, other: "Bytecode | int | None") -> "Bytecode":
231283
"""
@@ -295,6 +347,60 @@ def refund(
295347
total_refund += opcode_refund_calculator(opcode)
296348
return total_refund
297349

350+
def fill(self, placeholder: Placeholder, value: int) -> "Bytecode":
351+
"""
352+
Replace a placeholder with an actual value.
353+
354+
Args:
355+
placeholder: The placeholder to fill
356+
value: The value to insert (must fit in placeholder's width)
357+
358+
Returns:
359+
New Bytecode with the placeholder replaced
360+
361+
Raises:
362+
ValueError: If value doesn't fit in placeholder's width
363+
KeyError: If placeholder not found in this bytecode
364+
365+
"""
366+
if placeholder not in self._placeholders:
367+
raise KeyError(f"Placeholder {placeholder} not found in bytecode")
368+
369+
max_value = (1 << (placeholder.width * 8)) - 1
370+
if value < 0 or value > max_value:
371+
raise ValueError(
372+
f"Value {value} doesn't fit in {placeholder.width} bytes "
373+
f"(max {max_value})"
374+
)
375+
376+
offset = self._placeholders[placeholder]
377+
value_bytes = value.to_bytes(placeholder.width, "big")
378+
379+
# Replace the placeholder bytes with the actual value
380+
new_bytes = (
381+
self._bytes_[:offset]
382+
+ value_bytes
383+
+ self._bytes_[offset + placeholder.width :]
384+
)
385+
386+
# Create new Bytecode with updated bytes
387+
result = Bytecode(
388+
new_bytes,
389+
popped_stack_items=self.popped_stack_items,
390+
pushed_stack_items=self.pushed_stack_items,
391+
max_stack_height=self.max_stack_height,
392+
min_stack_height=self.min_stack_height,
393+
terminating=self.terminating,
394+
opcode_list=self.opcode_list[:],
395+
)
396+
397+
# Copy placeholders except the one we just filled
398+
result._placeholders = {
399+
p: o for p, o in self._placeholders.items() if p is not placeholder
400+
}
401+
402+
return result
403+
298404
@classmethod
299405
def __get_pydantic_core_schema__(
300406
cls, source_type: Any, handler: GetCoreSchemaHandler

packages/testing/src/execution_testing/vm/opcodes.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from execution_testing.base_types import to_bytes
2525

2626
from .bases import OpcodeBase
27-
from .bytecode import Bytecode
27+
from .bytecode import Bytecode, Placeholder
2828

2929

3030
def _get_int_size(n: int) -> int:
@@ -45,13 +45,29 @@ def _get_int_size(n: int) -> int:
4545

4646
def _stack_argument_to_bytecode(
4747
arg: (
48-
"int | bytes | SupportsBytes | str | Opcode | Bytecode | Iterable[int]"
48+
"int | bytes | SupportsBytes | str | Opcode"
49+
" | Bytecode | Iterable[int] | Placeholder"
4950
),
5051
) -> Bytecode:
5152
"""Convert stack argument in an opcode or macro to bytecode."""
5253
if isinstance(arg, Bytecode):
5354
return arg
5455

56+
# Handle Placeholder - create appropriate PUSH with zeros
57+
if isinstance(arg, Placeholder):
58+
# PUSH1 is 0x60, PUSH2 is 0x61, ..., PUSH32 is 0x7f
59+
push_opcode = 0x5F + arg.width
60+
bytecode_bytes = bytes([push_opcode]) + bytes(arg.width)
61+
bytecode = Bytecode(
62+
bytecode_bytes,
63+
popped_stack_items=0,
64+
pushed_stack_items=1,
65+
min_stack_height=0,
66+
max_stack_height=1,
67+
)
68+
bytecode._placeholders = {arg: 1}
69+
return bytecode
70+
5571
# We are going to push a constant to the stack.
5672
data_size = 0
5773
if isinstance(arg, int):
@@ -329,7 +345,10 @@ def with_metadata(self, **metadata: Any) -> "Opcode":
329345

330346
def __call__(
331347
self,
332-
*args_t: "int | bytes | str | Opcode | Bytecode | Iterable[int]",
348+
*args_t: (
349+
"int | bytes | str | Opcode"
350+
" | Bytecode | Iterable[int] | Placeholder"
351+
),
333352
unchecked: bool = False,
334353
**kwargs: "int | bytes | str | Opcode | Bytecode",
335354
) -> "Bytecode | Opcode":
@@ -362,9 +381,10 @@ def __call__(
362381
363382
Hex-strings will be automatically converted to bytes.
364383
"""
365-
args: List["int | bytes | str | Opcode | Bytecode | Iterable[int]"] = (
366-
list(args_t)
367-
)
384+
args: List[
385+
"int | bytes | str | Opcode"
386+
" | Bytecode | Iterable[int] | Placeholder"
387+
] = list(args_t)
368388
opcode = self
369389

370390
# handle metadata first
@@ -386,6 +406,7 @@ def __call__(
386406
assert type(opcode) is Opcode
387407
get_item_arg = args.pop()
388408
assert not isinstance(get_item_arg, Bytecode)
409+
assert not isinstance(get_item_arg, Placeholder)
389410
return opcode[get_item_arg](*args)
390411

391412
if opcode.kwargs is not None and len(kwargs) > 0:

0 commit comments

Comments
 (0)