Skip to content

Save block diffs #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: lithp/turbo-migration
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions eth/db/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
)

from eth_utils import (
big_endian_to_int,
encode_hex,
int_to_big_endian,
to_checksum_address,
to_tuple,
ValidationError,
Expand All @@ -41,6 +43,9 @@
from eth.db.batch import (
BatchDB,
)
from eth.db.block_diff import (
BlockDiff,
)
from eth.db.cache import (
CacheDB,
)
Expand All @@ -52,6 +57,7 @@
)
from eth.db.storage import (
AccountStorageDB,
StorageLookup,
)
from eth.db.typing import (
JournalDBCheckpoint,
Expand Down Expand Up @@ -260,6 +266,9 @@ def __init__(self, db: BaseAtomicDB, state_root: Hash32=BLANK_ROOT_HASH) -> None
self._dirty_accounts: Set[Address] = set()
self._root_hash_at_last_persist = state_root

self._dirty_account_rlps: Set[Address] = set()
self._deleted_accounts: Set[Address] = set()

@property
def state_root(self) -> Hash32:
return self._trie.root_hash
Expand Down Expand Up @@ -436,6 +445,7 @@ def delete_account(self, address: Address) -> None:
del self._journaltrie[address]

self._wipe_storage(address)
self._deleted_accounts.add(address)

def account_exists(self, address: Address) -> bool:
validate_canonical_address(address, title="Storage Address")
Expand Down Expand Up @@ -484,6 +494,8 @@ def _set_account(self, address: Address, account: Account) -> None:
rlp_account = rlp.encode(account, sedes=Account)
self._journaltrie[address] = rlp_account

self._dirty_account_rlps.add(address)

#
# Record and discard API
#
Expand Down Expand Up @@ -555,6 +567,8 @@ def persist(self) -> None:
# reset local storage trackers
self._account_stores = {}
self._dirty_accounts = set()
self._dirty_account_rlps = set()
self._deleted_accounts = set()

# persist accounts
self._validate_generated_root()
Expand All @@ -565,6 +579,105 @@ def persist(self) -> None:
self._batchdb.commit_to(write_batch, apply_deletes=False)
self._root_hash_at_last_persist = new_root_hash

def persist_returning_block_diff(self) -> BlockDiff:
"""
Persists, including a diff which can be used to unwind/replay the changes this block makes.
"""

block_diff = BlockDiff()

# 1. Grab all the changed accounts and their previous values

# pre-Byzantium make_storage_root is called at the end of every transaction, and
# it blows away all the changes. Create an old_trie here so we can peer into the
# state as it was at the beginning of the block.

old_trie = CacheDB(HashTrie(HexaryTrie(
self._raw_store_db, self._root_hash_at_last_persist, prune=False
)))

for deleted_address in self._deleted_accounts:
# TODO: this might raise a KeyError
old_value = old_trie[deleted_address]
block_diff.set_account_changed(deleted_address, old_value, b'')

for address in self._dirty_account_rlps:
old_value = old_trie[address]
new_value = self._get_encoded_account(address, from_journal=True)
block_diff.set_account_changed(address, old_value, new_value)

# 2. Grab all the changed storage items and their previous values.
dirty_stores = tuple(self._dirty_account_stores())
for address, store in dirty_stores:
diff = store.diff()

for key in diff.deleted_keys():
slot = big_endian_to_int(key)
current_slot_value = store.get(slot)
current_slot_value_bytes = int_to_big_endian(current_slot_value)
# TODO: Is b'' a valid value for a storage slot? 0 might be better
# TODO: this line is untested
block_diff.set_storage_changed(address, slot, current_slot_value_bytes, b'')

encoded_account = old_trie[address]
if encoded_account:
old_account = rlp.decode(encoded_account, sedes=Account)
else:
old_account = Account()
fresh_store = StorageLookup(
self._raw_store_db,
old_account.storage_root,
address
)

for key, new_value in diff.pending_items():
slot = big_endian_to_int(key)

# make a new StorageLookup because, pre-Byzantium, make_state_root is
# called at the end of every transaction, and making the state root blows
# away all changes. If we were to ask the store for the old value it would
# tell us the state as of the beginning of the last txn, not the state as
# of the beginnig of the block.

old_value_bytes = fresh_store.get(key)

block_diff.set_storage_changed(address, slot, old_value_bytes, new_value)

old_account_values: Dict[Address, bytes] = dict()
for address, _ in dirty_stores:
old_account_values[address] = self._get_encoded_account(address, from_journal=False)

# 3. Persist!
self.persist()

# 4. Grab the new storage roots
for address, _store in dirty_stores:
old_account_value = old_account_values[address]
new_account_value = self._get_encoded_account(address, from_journal=False)
block_diff.set_account_changed(address, old_account_value, new_account_value)

# 5. return the block diff
return block_diff

def _changed_accounts(self) -> DBDiff:
"""
Returns all the accounts which will be written to the db when persist() is called.

Careful! If some storage items have changed then the storage roots for some accounts
should also change but those accounts will not show up here unless something else about
them also changed.
"""
return self._journaltrie.diff()

def _changed_storage_items(self) -> Dict[Address, DBDiff]:
"""
Returns all the storage items which will be written to the db when persist() is called.
"""
return {
address: store.diff()
for address, store in self._dirty_account_stores()
}

def _validate_generated_root(self) -> None:
db_diff = self._journaldb.diff()
if len(db_diff):
Expand Down
133 changes: 133 additions & 0 deletions eth/db/block_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from collections import defaultdict
from typing import (
Dict,
Iterable,
Optional,
Set,
Tuple,
)

from eth_typing import (
Address,
Hash32,
)
from eth_utils import (
big_endian_to_int,
to_tuple
)
import rlp

from eth.db.backends.base import BaseDB
from eth.db.schema import SchemaTurbo
from eth.rlp.accounts import Account


"""
TODO: Decide on the best interface for returning changes:
- diff.get_slot_change() -> [old, new]
- diff.get_slot_change(new=FAlse) -> old
- diff.get_slot_change(kind=BlockDiff.OLD) -> old
- diff.get_old_slot_value() & diff.get_new_slot_value()
"""


class BlockDiff:

def __init__(self) -> None:
self.old_account_values: Dict[Address, Optional[bytes]] = dict()
self.new_account_values: Dict[Address, Optional[bytes]] = dict()

SLOT_TO_VALUE = Dict[int, bytes]
self.old_storage_items: Dict[Address, SLOT_TO_VALUE] = defaultdict(dict)
self.new_storage_items: Dict[Address, SLOT_TO_VALUE] = defaultdict(dict)

def set_account_changed(self, address: Address, old_value: bytes, new_value: bytes) -> None:
self.old_account_values[address] = old_value
self.new_account_values[address] = new_value

def set_storage_changed(self, address: Address, slot: int,
old_value: bytes, new_value: bytes) -> None:
self.old_storage_items[address][slot] = old_value
self.new_storage_items[address][slot] = new_value

def get_changed_accounts(self) -> Set[Address]:
return set(self.old_account_values.keys()) | set(self.old_storage_items.keys())

@to_tuple
def get_changed_storage_items(self) -> Iterable[Tuple[Address, int, bytes, bytes]]:
for address in self.old_storage_items.keys():
new_items = self.new_storage_items[address]
old_items = self.old_storage_items[address]
for slot in old_items.keys():
yield address, slot, old_items[slot], new_items[slot]

def get_changed_slots(self, address: Address) -> Set[int]:
"""
Returns which slots changed for the given account.
"""
if address not in self.old_storage_items.keys():
return set()

return set(self.old_storage_items[address].keys())

def get_slot_change(self, address: Address, slot: int) -> Tuple[int, int]:
if address not in self.old_storage_items:
raise Exception(f'account {address} did not change')
old_values = self.old_storage_items[address]

if slot not in old_values:
raise Exception(f"{address}'s slot {slot} did not change")

new_values = self.new_storage_items[address]
return big_endian_to_int(old_values[slot]), big_endian_to_int(new_values[slot])

def get_account(self, address: Address, new: bool = True) -> bytes:
dictionary = self.new_account_values if new else self.old_account_values
return dictionary[address]

def get_decoded_account(self, address: Address, new: bool = True) -> Optional[Account]:
encoded = self.get_account(address, new)
if encoded == b'':
return None # this means the account used to or currently does not exist
return rlp.decode(encoded, sedes=Account)

@classmethod
def from_db(cls, db: BaseDB, block_hash: Hash32) -> 'BlockDiff':
"""
KeyError is thrown if a diff was not saved for the provided {block_hash}
"""

encoded_diff = db[SchemaTurbo.make_block_diff_lookup_key(block_hash)]
diff = rlp.decode(encoded_diff)

accounts, storage_items = diff

block_diff = cls()

for key, old, new in accounts:
block_diff.set_account_changed(key, old, new)

for key, slot, old, new in storage_items:
decoded_slot = big_endian_to_int(slot) # rlp.encode turns our ints into bytes
block_diff.set_storage_changed(key, decoded_slot, old, new)

return block_diff

def write_to(self, db: BaseDB, block_hash: Hash32) -> None:

# TODO: this should probably verify that the state roots have all been added

accounts = [
[address, self.old_account_values[address], self.new_account_values[address]]
for address in self.old_account_values.keys()
]

storage_items = self.get_changed_storage_items()

diff = [
accounts,
storage_items
]

encoded_diff = rlp.encode(diff)
db[SchemaTurbo.make_block_diff_lookup_key(block_hash)] = encoded_diff
5 changes: 5 additions & 0 deletions eth/db/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ def make_transaction_hash_to_block_lookup_key(transaction_hash: Hash32) -> bytes

class SchemaTurbo(SchemaV1):
current_schema_lookup_key: bytes = b'current-schema'
_block_diff_prefix = b'block-diff'

@classmethod
def make_block_diff_lookup_key(cls, block_hash: Hash32) -> bytes:
return cls._block_diff_prefix + b':' + block_hash


def get_schema(db: BaseDB) -> Schemas:
Expand Down
Loading