Skip to content
This repository was archived by the owner on Sep 8, 2025. It is now read-only.

Commit f89fa61

Browse files
authored
Merge pull request #1918 from carver/key-logger
Add Database for tracking key accesses
2 parents 5d1b907 + 74dbaa8 commit f89fa61

File tree

11 files changed

+241
-21
lines changed

11 files changed

+241
-21
lines changed

Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ coverage:
3939
open htmlcov/index.html
4040

4141
build-docs:
42-
pip install -e .[doc]
4342
cd docs/; sphinx-build -W -T -E . _build/html
4443

4544
doctest:

docs/guides/understanding_the_mining_process.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -385,4 +385,4 @@ zero value transfer transaction.
385385
... )
386386

387387
>>> chain.mine_block(mix_hash=mix_hash, nonce=nonce)
388-
<ByzantiumBlock(#Block #1)>
388+
<ByzantiumBlock(#Block #1-0x41f6..2913)>

eth/abc.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,13 +403,24 @@ def delete(self, key: bytes) -> None:
403403
...
404404

405405

406+
class AtomicWriteBatchAPI(DatabaseAPI):
407+
"""
408+
The readable/writeable object returned by an atomic database when we start building
409+
a batch of writes to commit.
410+
411+
Reads to this database will observe writes written during batching,
412+
but the writes will not actually persist until this object is committed.
413+
"""
414+
pass
415+
416+
406417
class AtomicDatabaseAPI(DatabaseAPI):
407418
"""
408419
Like ``BatchDB``, but immediately write out changes if they are
409420
not in an ``atomic_batch()`` context.
410421
"""
411422
@abstractmethod
412-
def atomic_batch(self) -> ContextManager[DatabaseAPI]:
423+
def atomic_batch(self) -> ContextManager[AtomicWriteBatchAPI]:
413424
"""
414425
Return a :class:`~typing.ContextManager` to write an atomic batch to the database.
415426
"""

eth/db/accesslog.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
from contextlib import contextmanager
2+
import logging
3+
from typing import (
4+
Iterator,
5+
FrozenSet,
6+
Set,
7+
)
8+
9+
from eth.abc import (
10+
AtomicWriteBatchAPI,
11+
AtomicDatabaseAPI,
12+
DatabaseAPI,
13+
)
14+
from eth.db.backends.base import (
15+
BaseDB,
16+
)
17+
from eth.db.atomic import (
18+
BaseAtomicDB,
19+
)
20+
21+
22+
class KeyAccessLoggerDB(BaseDB):
23+
"""
24+
Wraps around a database, and tracks all the keys that were read since initialization.
25+
"""
26+
27+
logger = logging.getLogger("eth.db.KeyAccessLoggerDB")
28+
29+
def __init__(self, wrapped_db: DatabaseAPI, log_missing_keys: bool=True) -> None:
30+
"""
31+
:param log_missing_keys: True if a key is added to :attr:`keys_read` even if the
32+
key/value does not exist in the database.
33+
"""
34+
self.wrapped_db = wrapped_db
35+
self._keys_read: Set[bytes] = set()
36+
self._log_missing_keys = log_missing_keys
37+
38+
@property
39+
def keys_read(self) -> FrozenSet[bytes]:
40+
# Make a defensive copy so callers can't modify the list externally
41+
return frozenset(self._keys_read)
42+
43+
def __getitem__(self, key: bytes) -> bytes:
44+
try:
45+
result = self.wrapped_db.__getitem__(key)
46+
except KeyError:
47+
if self._log_missing_keys:
48+
self._keys_read.add(key)
49+
raise
50+
else:
51+
self._keys_read.add(key)
52+
return result
53+
54+
def __setitem__(self, key: bytes, value: bytes) -> None:
55+
self.wrapped_db[key] = value
56+
57+
def __delitem__(self, key: bytes) -> None:
58+
del self.wrapped_db[key]
59+
60+
def _exists(self, key: bytes) -> bool:
61+
does_exist = key in self.wrapped_db
62+
if does_exist or self._log_missing_keys:
63+
self._keys_read.add(key)
64+
return does_exist
65+
66+
67+
class KeyAccessLoggerAtomicDB(BaseAtomicDB):
68+
"""
69+
Wraps around an atomic database, and tracks all the keys that were read since initialization.
70+
"""
71+
logger = logging.getLogger("eth.db.KeyAccessLoggerAtomicDB")
72+
73+
def __init__(self, wrapped_db: AtomicDatabaseAPI, log_missing_keys: bool=True) -> None:
74+
"""
75+
:param log_missing_keys: True if a key is added to :attr:`keys_read` even if the
76+
key/value does not exist in the database.
77+
"""
78+
self.wrapped_db = wrapped_db
79+
self._keys_read: Set[bytes] = set()
80+
self._log_missing_keys = log_missing_keys
81+
82+
@property
83+
def keys_read(self) -> FrozenSet[bytes]:
84+
# Make a defensive copy so callers can't modify the list externally
85+
return frozenset(self._keys_read)
86+
87+
def __getitem__(self, key: bytes) -> bytes:
88+
try:
89+
result = self.wrapped_db.__getitem__(key)
90+
except KeyError:
91+
if self._log_missing_keys:
92+
self._keys_read.add(key)
93+
raise
94+
else:
95+
self._keys_read.add(key)
96+
return result
97+
98+
def __setitem__(self, key: bytes, value: bytes) -> None:
99+
self.wrapped_db[key] = value
100+
101+
def __delitem__(self, key: bytes) -> None:
102+
del self.wrapped_db[key]
103+
104+
def _exists(self, key: bytes) -> bool:
105+
does_exist = key in self.wrapped_db
106+
if does_exist or self._log_missing_keys:
107+
self._keys_read.add(key)
108+
return does_exist
109+
110+
@contextmanager
111+
def atomic_batch(self) -> Iterator[AtomicWriteBatchAPI]:
112+
with self.wrapped_db.atomic_batch() as readable_batch:
113+
yield readable_batch

eth/db/atomic.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
)
1010

1111
from eth.abc import (
12+
AtomicWriteBatchAPI,
1213
DatabaseAPI,
1314
)
1415

@@ -17,7 +18,7 @@
1718
DBDiffTracker,
1819
DiffMissingError,
1920
)
20-
from eth.db.backends.base import BaseDB, BaseAtomicDB
21+
from eth.db.backends.base import BaseAtomicDB, BaseDB
2122
from eth.db.backends.memory import MemoryDB
2223

2324

@@ -46,12 +47,12 @@ def _exists(self, key: bytes) -> bool:
4647
return key in self.wrapped_db
4748

4849
@contextmanager
49-
def atomic_batch(self) -> Iterator['AtomicDBWriteBatch']:
50+
def atomic_batch(self) -> Iterator[AtomicWriteBatchAPI]:
5051
with AtomicDBWriteBatch._commit_unless_raises(self) as readable_batch:
5152
yield readable_batch
5253

5354

54-
class AtomicDBWriteBatch(BaseDB):
55+
class AtomicDBWriteBatch(BaseDB, AtomicWriteBatchAPI):
5556
"""
5657
This is returned by a BaseAtomicDB during an atomic_batch, to provide a temporary view
5758
of the database, before commit.
@@ -112,7 +113,7 @@ def _exists(self, key: bytes) -> bool:
112113

113114
@classmethod
114115
@contextmanager
115-
def _commit_unless_raises(cls, write_target_db: DatabaseAPI) -> Iterator['AtomicDBWriteBatch']:
116+
def _commit_unless_raises(cls, write_target_db: DatabaseAPI) -> Iterator[AtomicWriteBatchAPI]:
116117
"""
117118
Commit all writes inside the context, unless an exception was raised.
118119

eth/db/backends/level.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@
22
import logging
33
from pathlib import Path
44
from typing import (
5-
Generator,
5+
Iterator,
66
TYPE_CHECKING,
77
)
88

99
from eth_utils import ValidationError
1010

11-
from eth.abc import DatabaseAPI
11+
from eth.abc import (
12+
AtomicWriteBatchAPI,
13+
DatabaseAPI,
14+
)
1215
from eth.db.diff import (
1316
DBDiffTracker,
1417
DiffMissingError,
@@ -67,7 +70,7 @@ def __delitem__(self, key: bytes) -> None:
6770
self.db.delete(key)
6871

6972
@contextmanager
70-
def atomic_batch(self) -> Generator['LevelDBWriteBatch', None, None]:
73+
def atomic_batch(self) -> Iterator[AtomicWriteBatchAPI]:
7174
with self.db.write_batch(transaction=True) as atomic_batch:
7275
readable_batch = LevelDBWriteBatch(self, atomic_batch)
7376
try:
@@ -76,7 +79,7 @@ def atomic_batch(self) -> Generator['LevelDBWriteBatch', None, None]:
7679
readable_batch.decommission()
7780

7881

79-
class LevelDBWriteBatch(BaseDB):
82+
class LevelDBWriteBatch(BaseDB, AtomicWriteBatchAPI):
8083
"""
8184
A native leveldb write batch does not permit reads on the in-progress data.
8285
This class fills that gap, by tracking the in-progress diff, and adding

eth/rlp/blocks.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
Type
33
)
44

5+
from eth_utils import (
6+
humanize_hash,
7+
)
8+
59
from eth._utils.datatypes import (
610
Configurable,
711
)
8-
912
from eth.abc import (
1013
BlockAPI,
1114
SignedTransactionAPI,
@@ -29,4 +32,5 @@ def __repr__(self) -> str:
2932
return f'<{self.__class__.__name__}(#{str(self)})>'
3033

3134
def __str__(self) -> str:
32-
return f"Block #{self.number}"
35+
clipped_hash = humanize_hash(self.hash)
36+
return f"Block #{self.number}-0x{clipped_hash}"

eth/tools/mining.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
1+
from eth.abc import (
2+
BlockAPI,
3+
VirtualMachineAPI,
4+
)
15
from eth.consensus import (
26
pow,
37
)
48

5-
from eth.rlp.blocks import (
6-
BaseBlock,
7-
)
8-
99

10-
class POWMiningMixin:
10+
class POWMiningMixin(VirtualMachineAPI):
1111
"""
1212
A VM that does POW mining as well. Should be used only in tests, when we
1313
need to programatically populate a ChainDB.
1414
"""
15-
def finalize_block(self, block: BaseBlock) -> BaseBlock:
16-
block = super().finalize_block(block) # type: ignore
15+
def finalize_block(self, block: BlockAPI) -> BlockAPI:
16+
block = super().finalize_block(block)
1717
nonce, mix_hash = pow.mine_pow_nonce(
1818
block.number, block.header.mining_hash, block.header.difficulty)
1919
return block.copy(header=block.header.copy(nonce=nonce, mix_hash=mix_hash))

newsfragments/1918.internal.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Added :class:`~eth.db.accesslog.KeyAccessLoggerDB` and its atomic twin; faster ``make
2+
validate-docs`` (but you have to remember to ``pip install -e .[doc]`` yourself); ``str(block)`` now
3+
includes the first 3 bytes of the block hash.

tests/database/test_accesslog.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from hypothesis import (
2+
given,
3+
strategies as st,
4+
)
5+
import pytest
6+
7+
from eth.db.accesslog import (
8+
KeyAccessLoggerDB,
9+
KeyAccessLoggerAtomicDB,
10+
)
11+
from eth.db.backends.memory import MemoryDB
12+
13+
14+
@given(st.lists(st.binary()))
15+
@pytest.mark.parametrize('DB', (
16+
lambda: KeyAccessLoggerAtomicDB(MemoryDB()),
17+
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=False),
18+
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=True),
19+
lambda: KeyAccessLoggerDB(MemoryDB()),
20+
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=False),
21+
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=True),
22+
))
23+
def test_log_accesses(DB, keys):
24+
db = DB()
25+
assert len(db.keys_read) == 0
26+
for key in keys:
27+
db[key] = b'placeholder' # value doesn't matter
28+
assert db[key] == b'placeholder'
29+
30+
for key in keys:
31+
assert key in db.keys_read
32+
33+
34+
@pytest.mark.parametrize('DB', (
35+
lambda: KeyAccessLoggerAtomicDB(MemoryDB()),
36+
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=True),
37+
lambda: KeyAccessLoggerDB(MemoryDB()),
38+
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=True),
39+
))
40+
def test_logs_missing_keys(DB):
41+
db_logs_missing = DB()
42+
assert len(db_logs_missing.keys_read) == 0
43+
assert b'exist-test' not in db_logs_missing
44+
45+
assert b'exist-test' in db_logs_missing.keys_read
46+
47+
with pytest.raises(KeyError, match='get-test'):
48+
db_logs_missing[b'get-test']
49+
50+
assert b'get-test' in db_logs_missing.keys_read
51+
assert len(db_logs_missing.keys_read) == 2
52+
53+
54+
@pytest.mark.parametrize('DB', (
55+
lambda: KeyAccessLoggerAtomicDB(MemoryDB(), log_missing_keys=False),
56+
lambda: KeyAccessLoggerDB(MemoryDB(), log_missing_keys=False),
57+
))
58+
def test_dont_log_missing_keys(DB):
59+
db_doesnt_log_missing = DB()
60+
assert len(db_doesnt_log_missing.keys_read) == 0
61+
assert b'exist-test' not in db_doesnt_log_missing
62+
63+
assert b'exist-test' not in db_doesnt_log_missing.keys_read
64+
65+
with pytest.raises(KeyError, match='get-test'):
66+
db_doesnt_log_missing[b'get-test']
67+
68+
assert b'get-test' not in db_doesnt_log_missing.keys_read
69+
assert len(db_doesnt_log_missing.keys_read) == 0

0 commit comments

Comments
 (0)