Skip to content

Commit 61a9a37

Browse files
authored
Base implementation of RocksDB support (#1416)
* Add RocksDB backend * Ensure we dont reuse database dir for different database engines * Cache rocksdb installation * PR feedback * fix bash scripts * fix types
1 parent e48b279 commit 61a9a37

22 files changed

+563
-192
lines changed

.circleci/config.yml

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,48 @@ common: &common
2323
when: on_fail
2424
- restore_cache:
2525
keys:
26-
- cache-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
26+
- cache-v1-python-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
27+
- restore_cache:
28+
keys:
29+
- cache-v1-rocksdb-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/install_rocksdb.sh" }}
30+
- run:
31+
name: install rocksdb
32+
command: sudo sh ./.circleci/install_rocksdb.sh
33+
- save_cache:
34+
paths:
35+
- ~/rocksdb/
36+
key: cache-v1-rocksdb-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/install_rocksdb.sh" }}
2737
- run:
2838
name: install dependencies
2939
command: pip install --user tox
3040
- run:
3141
name: run tox
32-
command: ~/.local/bin/tox
42+
command: ~/.local/bin/tox -r
3343
- save_cache:
3444
paths:
3545
- .hypothesis
36-
- .tox
3746
- ~/.cache/pip
3847
- ~/.local
3948
- ./eggs
40-
key: cache-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
49+
key: cache-v1-python-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
4150

4251
geth_steps: &geth_steps
4352
working_directory: ~/repo
4453
steps:
4554
- checkout
4655
- restore_cache:
4756
keys:
48-
- cache-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
57+
- cache-v1-python-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
58+
- restore_cache:
59+
keys:
60+
- cache-v2-rocksdb-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/install_rocksdb.sh" }}
61+
- run:
62+
name: install rocksdb
63+
command: sudo sh ./.circleci/install_rocksdb.sh
64+
- save_cache:
65+
paths:
66+
- ~/rocksdb/
67+
key: cache-v2-rocksdb-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/install_rocksdb.sh" }}
4968
- run:
5069
name: install dependencies
5170
command: pip install --user tox
@@ -79,7 +98,7 @@ geth_steps: &geth_steps
7998
- ./eggs
8099
- ~/.ethash
81100
- ~/.py-geth
82-
key: cache-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
101+
key: cache-v1-python-{{ arch }}-{{ .Environment.CIRCLE_JOB }}-{{ checksum "setup.py" }}-{{ checksum "tox.ini" }}
83102

84103
jobs:
85104
py35-lint:
@@ -266,7 +285,6 @@ jobs:
266285
environment:
267286
TOXENV: py36-trinity-integration
268287
py36-trinity-lightchain_integration:
269-
<<: *common
270288
<<: *geth_steps
271289
docker:
272290
- image: circleci/python:3.6

.circleci/install_rocksdb.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/usr/bin/env bash
2+
3+
set -o errexit
4+
set -o nounset
5+
6+
sudo apt-get install -y liblz4-dev libsnappy-dev libgflags-dev zlib1g-dev libbz2-dev libzstd-dev
7+
8+
9+
if [ ! -d "/home/circleci/rocksdb" ]; then
10+
git clone https://github.com/facebook/rocksdb /home/circleci/rocksdb
11+
fi
12+
if [ ! -f "/home/circleci/rocksdb/librocksdb.so.5.8.8" ]; then
13+
cd /home/circleci/rocksdb/ && git checkout v5.8.8 && sudo make install-shared INSTALL_PATH=/usr
14+
fi
15+
if [ ! -f "/usr/lib/librocksdb.so.5.8" ]; then
16+
ln -fs /home/circleci/rocksdb/librocksdb.so.5.8.8 /usr/lib/librocksdb.so.5.8
17+
fi
18+
if [ ! -f "/usr/lib/librocksdb.so.5" ]; then
19+
ln -fs /home/circleci/rocksdb/librocksdb.so.5.8.8 /usr/lib/librocksdb.so.5
20+
fi
21+
if [ ! -f "/usr/lib/librocksdb.so" ]; then
22+
ln -fs /home/circleci/rocksdb/librocksdb.so.5.8.8 /usr/lib/librocksdb.so
23+
fi

.circleci/merge_pr.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#!/usr/bin/env bash
22

3+
set -o errexit
4+
set -o nounset
5+
36
if [[ -n "${CIRCLE_PR_NUMBER}" ]]; then
47
PR_INFO_URL=https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/$CIRCLE_PR_NUMBER
58
PR_BASE_BRANCH=$(curl -L "$PR_INFO_URL" | python -c 'import json, sys; obj = json.load(sys.stdin); sys.stdout.write(obj["base"]["ref"])')

docs/guides/trinity/quickstart.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ we need to install the ``python3-pip`` package through the following command.
2626
2727
apt-get install python3-pip
2828
29+
Trinity also requires RocksDB which can be installed with the following command:
30+
31+
.. code:: sh
32+
33+
apt-get install liblz4-dev lib-rocksdb5.8
34+
2935
.. note::
3036
.. include:: /fragments/virtualenv_explainer.rst
3137

@@ -42,7 +48,7 @@ First, install LevelDB and the latest Python 3 with brew:
4248

4349
.. code:: sh
4450
45-
brew install python3 leveldb
51+
brew install python3 leveldb rocksdb
4652
4753
.. note::
4854
.. include:: /fragments/virtualenv_explainer.rst

eth/db/backends/level.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from pathlib import Path
44
from typing import (
5-
Generator,
5+
Iterator,
66
TYPE_CHECKING,
77
)
88

@@ -27,10 +27,9 @@
2727
class LevelDB(BaseAtomicDB):
2828
logger = logging.getLogger("eth.db.backends.LevelDB")
2929

30-
# Creates db as a class variable to avoid level db lock error
3130
def __init__(self, db_path: Path = None) -> None:
3231
if not db_path:
33-
raise TypeError("Please specifiy a valid path for your database.")
32+
raise TypeError("The LevelDB backend requires a database path")
3433
try:
3534
with catch_and_ignore_import_warning():
3635
import plyvel # noqa: F811
@@ -54,10 +53,13 @@ def _exists(self, key: bytes) -> bool:
5453
return self.db.get(key) is not None
5554

5655
def __delitem__(self, key: bytes) -> None:
56+
v = self.db.get(key)
57+
if v is None:
58+
raise KeyError(key)
5759
self.db.delete(key)
5860

5961
@contextmanager
60-
def atomic_batch(self) -> Generator['LevelDBWriteBatch', None, None]:
62+
def atomic_batch(self) -> Iterator['LevelDBWriteBatch']:
6163
with self.db.write_batch(transaction=True) as atomic_batch:
6264
readable_batch = LevelDBWriteBatch(self, atomic_batch)
6365
try:

eth/db/backends/rocks.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
from contextlib import contextmanager
2+
import logging
3+
from pathlib import Path
4+
from typing import (
5+
Iterator,
6+
TYPE_CHECKING,
7+
)
8+
9+
from eth_utils import ValidationError
10+
11+
from eth.db.diff import (
12+
DBDiffTracker,
13+
DiffMissingError,
14+
)
15+
from .base import (
16+
BaseAtomicDB,
17+
BaseDB,
18+
)
19+
20+
if TYPE_CHECKING:
21+
import rocksdb # noqa: F401
22+
23+
24+
class RocksDB(BaseAtomicDB):
25+
logger = logging.getLogger("eth.db.backends.RocksDB")
26+
27+
def __init__(self,
28+
db_path: Path = None,
29+
opts: 'rocksdb.Options' = None,
30+
read_only: bool=False) -> None:
31+
if not db_path:
32+
raise TypeError("The RocksDB backend requires a database path")
33+
try:
34+
import rocksdb # noqa: F811
35+
except ImportError:
36+
raise ImportError(
37+
"RocksDB requires the python-rocksdb library which is not "
38+
"available for import."
39+
)
40+
41+
if opts is None:
42+
opts = rocksdb.Options(create_if_missing=True)
43+
self.db_path = db_path
44+
self.db = rocksdb.DB(str(db_path), opts, read_only=read_only)
45+
46+
def __getitem__(self, key: bytes) -> bytes:
47+
v = self.db.get(key)
48+
if v is None:
49+
raise KeyError(key)
50+
return v
51+
52+
def __setitem__(self, key: bytes, value: bytes) -> None:
53+
self.db.put(key, value)
54+
55+
def _exists(self, key: bytes) -> bool:
56+
return self.db.get(key) is not None
57+
58+
def __delitem__(self, key: bytes) -> None:
59+
exists, _ = self.db.key_may_exist(key)
60+
if not exists:
61+
raise KeyError(key)
62+
self.db.delete(key)
63+
64+
@contextmanager
65+
def atomic_batch(self) -> Iterator['RocksDBWriteBatch']:
66+
import rocksdb # noqa: F811
67+
batch = rocksdb.WriteBatch()
68+
69+
readable_batch = RocksDBWriteBatch(self, batch)
70+
71+
try:
72+
yield readable_batch
73+
finally:
74+
readable_batch.decommission()
75+
76+
self.db.write(batch)
77+
78+
79+
class RocksDBWriteBatch(BaseDB):
80+
"""
81+
A native rocksdb write batch does not permit reads on the in-progress data.
82+
This class fills that gap, by tracking the in-progress diff, and adding
83+
a read interface.
84+
"""
85+
logger = logging.getLogger("eth.db.backends.RocksDBWriteBatch")
86+
87+
def __init__(self, original_read_db: BaseDB, write_batch: 'rocksdb.WriteBatch') -> None:
88+
self._original_read_db = original_read_db
89+
self._write_batch = write_batch
90+
# keep track of the temporary changes made
91+
self._track_diff = DBDiffTracker()
92+
93+
def __getitem__(self, key: bytes) -> bytes:
94+
if self._track_diff is None:
95+
raise ValidationError("Cannot get data from a write batch, out of context")
96+
97+
try:
98+
changed_value = self._track_diff[key]
99+
except DiffMissingError as missing:
100+
if missing.is_deleted:
101+
raise KeyError(key)
102+
else:
103+
return self._original_read_db[key]
104+
else:
105+
return changed_value
106+
107+
def __setitem__(self, key: bytes, value: bytes) -> None:
108+
if self._track_diff is None:
109+
raise ValidationError("Cannot set data from a write batch, out of context")
110+
111+
self._write_batch.put(key, value)
112+
self._track_diff[key] = value
113+
114+
def _exists(self, key: bytes) -> bool:
115+
if self._track_diff is None:
116+
raise ValidationError("Cannot test data existance from a write batch, out of context")
117+
118+
try:
119+
self._track_diff[key]
120+
except DiffMissingError as missing:
121+
if missing.is_deleted:
122+
return False
123+
else:
124+
return key in self._original_read_db
125+
else:
126+
return True
127+
128+
def __delitem__(self, key: bytes) -> None:
129+
if self._track_diff is None:
130+
raise ValidationError("Cannot delete data from a write batch, out of context")
131+
132+
self._write_batch.delete(key)
133+
del self._track_diff[key]
134+
135+
def decommission(self) -> None:
136+
"""
137+
Prevent any further actions to be taken on this write batch, called after leaving context
138+
"""
139+
self._track_diff = None

scripts/benchmark/utils/chain_plumbing.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
from eth.chains.base import (
3030
MiningChain,
3131
)
32-
from eth.db.backends.level import (
33-
LevelDB,
32+
from eth.db.backends.rocks import (
33+
RocksDB,
3434
)
3535
from eth.vm.base import (
3636
BaseVM,
@@ -91,14 +91,14 @@
9191
def get_chain(vm: Type[BaseVM], genesis_state: GenesisState) -> Iterable[MiningChain]:
9292

9393
with tempfile.TemporaryDirectory() as temp_dir:
94-
level_db_obj = LevelDB(Path(temp_dir))
95-
level_db_chain = build(
94+
base_db = RocksDB(Path(temp_dir))
95+
chain = build(
9696
MiningChain,
9797
fork_at(vm, constants.GENESIS_BLOCK_NUMBER),
9898
disable_pow_check(),
99-
genesis(db=level_db_obj, params=GENESIS_PARAMS, state=genesis_state)
99+
genesis(db=base_db, params=GENESIS_PARAMS, state=genesis_state)
100100
)
101-
yield level_db_chain
101+
yield chain
102102

103103

104104
def get_all_chains(genesis_state: GenesisState=DEFAULT_GENESIS_STATE) -> Iterable[MiningChain]:

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
"eth-hash[pysha3];implementation_name=='cpython'",
2828
"eth-hash[pycryptodome];implementation_name=='pypy'",
2929
"plyvel==1.0.5",
30+
"python-rocksdb==0.6.9",
3031
],
3132
'p2p': [
3233
"asyncio-cancel-token==0.1.0a2",
@@ -43,6 +44,7 @@
4344
"coincurve>=8.0.0,<9.0.0",
4445
"ipython>=6.2.1,<7.0.0",
4546
"plyvel==1.0.5",
47+
"python-rocksdb==0.6.9",
4648
"web3==4.4.1",
4749
"lahja==0.9.0",
4850
"termcolor>=1.1.0,<2.0.0",
@@ -57,7 +59,7 @@
5759
"pytest-asyncio==0.9.0",
5860
"pytest-cov==2.5.1",
5961
"pytest-watch>=4.1.0,<5",
60-
"pytest-xdist==1.18.1",
62+
"pytest-xdist==1.23.2",
6163
# only needed for p2p
6264
"pytest-asyncio-network-simulator==0.1.0a2;python_version>='3.6'",
6365
],

tests/database/test_base_atomic_db.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,17 @@
44

55
from eth.db.atomic import AtomicDB
66
from eth.db.backends.level import LevelDB
7+
from eth.db.backends.rocks import RocksDB
78

89

9-
@pytest.fixture(params=['atomic', 'level'])
10+
@pytest.fixture(params=['atomic', 'level', 'rocks'])
1011
def atomic_db(request, tmpdir):
1112
if request.param == 'atomic':
1213
return AtomicDB()
1314
elif request.param == 'level':
1415
return LevelDB(db_path=tmpdir.mkdir("level_db_path"))
16+
elif request.param == 'rocks':
17+
return RocksDB(db_path=tmpdir.mkdir("rocks_db_path"))
1518
else:
1619
raise ValueError("Unexpected database type: {}".format(request.param))
1720

0 commit comments

Comments
 (0)