Skip to content

Commit bff2b67

Browse files
committed
ssz encoding and bal validation
1 parent e491702 commit bff2b67

File tree

1 file changed

+277
-75
lines changed

1 file changed

+277
-75
lines changed

src/ethereum/osaka/bal_utils.py

Lines changed: 277 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,26 @@
55
Utilities for working with Block Access Lists, including hashing and validation.
66
"""
77

8+
from typing import Union, Optional
89
from ethereum_types.bytes import Bytes
10+
from ethereum_types.numeric import Uint
911

1012
from ethereum.crypto.hash import Hash32, keccak256
1113

12-
from .ssz_types import BlockAccessList
14+
from .ssz_types import (
15+
BlockAccessList,
16+
AccountChanges,
17+
SlotChanges,
18+
SlotRead,
19+
StorageChange,
20+
BalanceChange,
21+
NonceChange,
22+
CodeChange,
23+
MAX_TXS,
24+
MAX_SLOTS,
25+
MAX_ACCOUNTS,
26+
MAX_CODE_SIZE,
27+
)
1328

1429

1530
def compute_bal_hash(bal: BlockAccessList) -> Hash32:
@@ -28,109 +43,296 @@ def compute_bal_hash(bal: BlockAccessList) -> Hash32:
2843
hash :
2944
The keccak256 hash of the SSZ-encoded BAL.
3045
"""
31-
# For now, use a simple implementation - in a full implementation,
32-
# this would use proper SSZ encoding
33-
bal_bytes = _encode_bal_to_bytes(bal)
46+
bal_bytes = ssz_encode_block_access_list(bal)
3447
return keccak256(bal_bytes)
3548

3649

37-
def _encode_bal_to_bytes(bal: BlockAccessList) -> Bytes:
38-
"""
39-
Encode a BlockAccessList to bytes for hashing.
40-
41-
This is a simplified implementation. In a production system,
42-
this would use proper SSZ encoding.
43-
"""
50+
def ssz_encode_uint(value: Union[int, Uint], size: int) -> bytes:
51+
"""Encode an unsigned integer as SSZ (little-endian)."""
52+
if isinstance(value, Uint):
53+
value = int(value)
54+
return value.to_bytes(size, 'little')
55+
56+
57+
def ssz_encode_bytes(data: bytes) -> bytes:
58+
"""Encode fixed-size bytes as SSZ."""
59+
return data
60+
61+
62+
def ssz_encode_list(items: tuple, encode_item_fn, max_length: int = None) -> bytes:
63+
"""Encode a list/tuple as SSZ with optional max length."""
64+
# For variable-length lists, we need offset encoding
65+
# First, encode the list length
4466
result = bytearray()
4567

46-
# Encode number of accounts
47-
result.extend(len(bal.account_changes).to_bytes(4, 'big'))
48-
49-
for account in bal.account_changes:
50-
# Encode address
51-
result.extend(account.address)
68+
# If max_length is specified, this is a variable-length list
69+
if max_length is not None:
70+
# Variable-length lists use offset encoding
71+
# First 4 bytes: offset to start of data
72+
item_count = len(items)
73+
if item_count == 0:
74+
# Empty list is encoded as just the 4-byte offset pointing to itself
75+
return ssz_encode_uint(4, 4)
5276

53-
# Encode storage changes count
54-
result.extend(len(account.storage_changes).to_bytes(4, 'big'))
55-
for slot_changes in account.storage_changes:
56-
result.extend(slot_changes.slot)
57-
result.extend(len(slot_changes.changes).to_bytes(2, 'big'))
58-
for change in slot_changes.changes:
59-
result.extend(change.tx_index.to_bytes(2, 'big'))
60-
result.extend(change.new_value)
61-
62-
# Encode storage reads count
63-
result.extend(len(account.storage_reads).to_bytes(4, 'big'))
64-
for slot_read in account.storage_reads:
65-
result.extend(slot_read.slot)
66-
67-
# Encode balance changes count
68-
result.extend(len(account.balance_changes).to_bytes(2, 'big'))
69-
for balance_change in account.balance_changes:
70-
result.extend(balance_change.tx_index.to_bytes(2, 'big'))
71-
result.extend(balance_change.post_balance)
72-
73-
# Encode nonce changes count
74-
result.extend(len(account.nonce_changes).to_bytes(2, 'big'))
75-
for nonce_change in account.nonce_changes:
76-
result.extend(nonce_change.tx_index.to_bytes(2, 'big'))
77-
result.extend(nonce_change.new_nonce.to_bytes(8, 'big'))
77+
# Calculate if items are fixed or variable size
78+
first_item_encoded = encode_item_fn(items[0]) if items else b''
79+
is_fixed_size = all(len(encode_item_fn(item)) == len(first_item_encoded) for item in items)
7880

79-
# Encode code changes count
80-
result.extend(len(account.code_changes).to_bytes(2, 'big'))
81-
for code_change in account.code_changes:
82-
result.extend(code_change.tx_index.to_bytes(2, 'big'))
83-
result.extend(len(code_change.new_code).to_bytes(4, 'big'))
84-
result.extend(code_change.new_code)
81+
if is_fixed_size:
82+
# Fixed-size elements: concatenate directly
83+
for item in items:
84+
result.extend(encode_item_fn(item))
85+
else:
86+
# Variable-size elements: use offset encoding
87+
# Reserve space for offsets
88+
offset_start = 4 * item_count
89+
data_section = bytearray()
90+
91+
for item in items:
92+
# Write offset
93+
result.extend(ssz_encode_uint(offset_start + len(data_section), 4))
94+
# Encode item data
95+
item_data = encode_item_fn(item)
96+
data_section.extend(item_data)
97+
98+
result.extend(data_section)
99+
else:
100+
# Fixed-length list/tuple: just concatenate
101+
for item in items:
102+
result.extend(encode_item_fn(item))
85103

86-
return Bytes(result)
104+
return bytes(result)
105+
106+
107+
def ssz_encode_storage_change(change: StorageChange) -> bytes:
108+
"""Encode a StorageChange as SSZ."""
109+
result = bytearray()
110+
result.extend(ssz_encode_uint(change.tx_index, 2)) # TxIndex as uint16
111+
result.extend(ssz_encode_bytes(change.new_value)) # StorageValue as Bytes32
112+
return bytes(result)
113+
114+
115+
def ssz_encode_balance_change(change: BalanceChange) -> bytes:
116+
"""Encode a BalanceChange as SSZ."""
117+
result = bytearray()
118+
result.extend(ssz_encode_uint(change.tx_index, 2)) # TxIndex as uint16
119+
result.extend(ssz_encode_uint(change.post_balance, 32)) # Balance as uint256
120+
return bytes(result)
121+
122+
123+
def ssz_encode_nonce_change(change: NonceChange) -> bytes:
124+
"""Encode a NonceChange as SSZ."""
125+
result = bytearray()
126+
result.extend(ssz_encode_uint(change.tx_index, 2)) # TxIndex as uint16
127+
result.extend(ssz_encode_uint(change.new_nonce, 8)) # Nonce as uint64
128+
return bytes(result)
129+
130+
131+
def ssz_encode_code_change(change: CodeChange) -> bytes:
132+
"""Encode a CodeChange as SSZ."""
133+
result = bytearray()
134+
result.extend(ssz_encode_uint(change.tx_index, 2)) # TxIndex as uint16
135+
# Code is variable length, so we encode length first for variable-size containers
136+
code_len = len(change.new_code)
137+
# In SSZ, variable-length byte arrays are prefixed with their length
138+
result.extend(ssz_encode_uint(code_len, 4))
139+
result.extend(change.new_code)
140+
return bytes(result)
141+
142+
143+
def ssz_encode_slot_changes(slot_changes: SlotChanges) -> bytes:
144+
"""Encode SlotChanges as SSZ."""
145+
result = bytearray()
146+
result.extend(ssz_encode_bytes(slot_changes.slot)) # StorageKey as Bytes32
147+
# Encode the list of changes
148+
changes_encoded = ssz_encode_list(
149+
slot_changes.changes,
150+
ssz_encode_storage_change,
151+
MAX_TXS # max length for changes
152+
)
153+
result.extend(changes_encoded)
154+
return bytes(result)
155+
156+
157+
def ssz_encode_slot_read(slot_read: SlotRead) -> bytes:
158+
"""Encode SlotRead as SSZ."""
159+
return ssz_encode_bytes(slot_read.slot) # StorageKey as Bytes32
160+
161+
162+
def ssz_encode_account_changes(account: AccountChanges) -> bytes:
163+
"""Encode AccountChanges as SSZ."""
164+
# For variable-size struct, we use offset encoding
165+
result = bytearray()
166+
offsets = []
167+
data_section = bytearray()
168+
169+
# Fixed-size fields first
170+
result.extend(ssz_encode_bytes(account.address)) # Address as Bytes20
171+
172+
# Variable-size fields use offsets
173+
# Calculate base offset (after all fixed fields and offset values)
174+
base_offset = 20 + (5 * 4) # address + 5 offset fields
175+
176+
# Encode storage_changes
177+
storage_changes_data = ssz_encode_list(
178+
account.storage_changes,
179+
ssz_encode_slot_changes,
180+
MAX_SLOTS
181+
)
182+
offsets.append(base_offset + len(data_section))
183+
data_section.extend(storage_changes_data)
184+
185+
# Encode storage_reads
186+
storage_reads_data = ssz_encode_list(
187+
account.storage_reads,
188+
ssz_encode_slot_read,
189+
MAX_SLOTS
190+
)
191+
offsets.append(base_offset + len(data_section))
192+
data_section.extend(storage_reads_data)
193+
194+
# Encode balance_changes
195+
balance_changes_data = ssz_encode_list(
196+
account.balance_changes,
197+
ssz_encode_balance_change,
198+
MAX_TXS
199+
)
200+
offsets.append(base_offset + len(data_section))
201+
data_section.extend(balance_changes_data)
202+
203+
# Encode nonce_changes
204+
nonce_changes_data = ssz_encode_list(
205+
account.nonce_changes,
206+
ssz_encode_nonce_change,
207+
MAX_TXS
208+
)
209+
offsets.append(base_offset + len(data_section))
210+
data_section.extend(nonce_changes_data)
211+
212+
# Encode code_changes
213+
code_changes_data = ssz_encode_list(
214+
account.code_changes,
215+
ssz_encode_code_change,
216+
MAX_TXS
217+
)
218+
offsets.append(base_offset + len(data_section))
219+
data_section.extend(code_changes_data)
220+
221+
# Write offsets
222+
for offset in offsets:
223+
result.extend(ssz_encode_uint(offset, 4))
224+
225+
# Write data section
226+
result.extend(data_section)
227+
228+
return bytes(result)
229+
230+
231+
def ssz_encode_block_access_list(bal: BlockAccessList) -> Bytes:
232+
"""
233+
Encode a BlockAccessList to SSZ bytes.
234+
235+
This implements proper SSZ encoding following the Ethereum SSZ specification.
236+
"""
237+
encoded = ssz_encode_list(
238+
bal.account_changes,
239+
ssz_encode_account_changes,
240+
MAX_ACCOUNTS
241+
)
242+
return Bytes(encoded)
87243

88244

89245
def validate_bal_against_execution(
90246
bal: BlockAccessList,
91-
accessed_addresses: set,
92-
accessed_storage_keys: set,
93-
state_changes: dict
247+
bal_builder: Optional['BALBuilder'] = None
94248
) -> bool:
95249
"""
96-
Validate that a BAL accurately represents the execution traces.
250+
Validate that a BAL is structurally correct and optionally matches a builder's state.
97251
98252
Parameters
99253
----------
100254
bal :
101255
The Block Access List to validate.
102-
accessed_addresses :
103-
Set of addresses accessed during execution.
104-
accessed_storage_keys :
105-
Set of (address, key) tuples accessed during execution.
106-
state_changes :
107-
Dictionary of state changes that occurred during execution.
256+
bal_builder :
257+
Optional BAL builder to validate against. If provided, checks that the BAL
258+
hash matches what would be built from the builder's current state.
108259
109260
Returns
110261
-------
111262
valid :
112-
True if the BAL accurately represents the execution.
263+
True if the BAL is structurally valid and matches the builder (if provided).
113264
"""
114-
# Extract addresses from BAL
115-
bal_addresses = {account.address for account in bal.account_changes}
265+
# 1. Validate structural constraints
266+
267+
# Check that storage changes and reads don't overlap for the same slot
268+
for account in bal.account_changes:
269+
changed_slots = {sc.slot for sc in account.storage_changes}
270+
read_slots = {sr.slot for sr in account.storage_reads}
271+
272+
# A slot should not be in both changes and reads (per EIP-7928)
273+
if changed_slots & read_slots:
274+
return False
116275

117-
# Check that all accessed addresses are in BAL
118-
if not accessed_addresses.issubset(bal_addresses):
276+
# 2. Validate ordering (addresses should be sorted lexicographically)
277+
addresses = [account.address for account in bal.account_changes]
278+
if addresses != sorted(addresses):
119279
return False
120280

121-
# Extract storage keys from BAL
122-
bal_storage_keys = set()
281+
# 3. Validate all data is within bounds
282+
max_tx_index = MAX_TXS - 1
123283
for account in bal.account_changes:
284+
# Validate storage slots are sorted within each account
285+
storage_slots = [sc.slot for sc in account.storage_changes]
286+
if storage_slots != sorted(storage_slots):
287+
return False
288+
289+
# Check storage changes
124290
for slot_changes in account.storage_changes:
125-
bal_storage_keys.add((account.address, slot_changes.slot))
126-
for slot_read in account.storage_reads:
127-
bal_storage_keys.add((account.address, slot_read.slot))
128-
129-
# Check that all accessed storage keys are in BAL
130-
if not accessed_storage_keys.issubset(bal_storage_keys):
131-
return False
291+
# Check changes are sorted by tx_index
292+
tx_indices = [c.tx_index for c in slot_changes.changes]
293+
if tx_indices != sorted(tx_indices):
294+
return False
295+
296+
for change in slot_changes.changes:
297+
if change.tx_index > max_tx_index:
298+
return False
299+
300+
# Check balance changes are sorted by tx_index
301+
balance_tx_indices = [bc.tx_index for bc in account.balance_changes]
302+
if balance_tx_indices != sorted(balance_tx_indices):
303+
return False
304+
305+
for balance_change in account.balance_changes:
306+
if balance_change.tx_index > max_tx_index:
307+
return False
308+
309+
# Check nonce changes are sorted by tx_index
310+
nonce_tx_indices = [nc.tx_index for nc in account.nonce_changes]
311+
if nonce_tx_indices != sorted(nonce_tx_indices):
312+
return False
313+
314+
for nonce_change in account.nonce_changes:
315+
if nonce_change.tx_index > max_tx_index:
316+
return False
317+
318+
# Check code changes are sorted by tx_index
319+
code_tx_indices = [cc.tx_index for cc in account.code_changes]
320+
if code_tx_indices != sorted(code_tx_indices):
321+
return False
322+
323+
for code_change in account.code_changes:
324+
if code_change.tx_index > max_tx_index:
325+
return False
326+
if len(code_change.new_code) > MAX_CODE_SIZE:
327+
return False
132328

133-
# Additional validation could be added here to check specific state changes
134-
# For now, we assume the BAL construction is correct if address/storage coverage is complete
329+
# 4. If BAL builder provided, validate against it by comparing hashes
330+
if bal_builder is not None:
331+
# Build a BAL from the builder
332+
expected_bal = bal_builder.build()
333+
334+
# Compare hashes - much simpler!
335+
if compute_bal_hash(bal) != compute_bal_hash(expected_bal):
336+
return False
135337

136338
return True

0 commit comments

Comments
 (0)