Skip to content

Commit a6a830c

Browse files
committed
Also import block bodies, add logging
1 parent 22b91b8 commit a6a830c

File tree

1 file changed

+74
-23
lines changed

1 file changed

+74
-23
lines changed

scripts/gethimport.py

Lines changed: 74 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"""
66

77
import argparse
8+
import logging
89
import os
910
import os.path
1011
from pathlib import Path
@@ -16,10 +17,15 @@
1617

1718
from eth_utils import humanize_hash
1819
import rlp
20+
from rlp.sedes import CountableList
1921

2022
from eth.chains.mainnet import MAINNET_GENESIS_HEADER, MainnetChain
2123
from eth.db.backends.level import LevelDB
2224
from eth.rlp.headers import BlockHeader
25+
from eth.rlp.transactions import BaseTransactionFields
26+
27+
28+
logger = logging.getLogger('importer')
2329

2430

2531
class GethKeys:
@@ -31,6 +37,8 @@ class GethKeys:
3137
headerNumberPrefix = b'H'
3238
headerHashSuffix = b'n'
3339

40+
blockBodyPrefix = b'b'
41+
3442
@classmethod
3543
def header_hash_for_block_number(cls, block_number: int) -> bytes:
3644
"The key to get the hash of the header with the given block number"
@@ -47,6 +55,11 @@ def block_header(cls, block_number: int, header_hash: bytes) -> bytes:
4755
packed_block_number = struct.pack('>Q', block_number)
4856
return cls.headerPrefix + packed_block_number + header_hash
4957

58+
@classmethod
59+
def block_body(cls, block_number: int, header_hash: bytes) -> bytes:
60+
packed_block_number = struct.pack('>Q', block_number)
61+
return cls.blockBodyPrefix + packed_block_number + header_hash
62+
5063

5164
class GethFreezerIndexEntry:
5265
def __init__(self, filenum: int, offset: int):
@@ -68,23 +81,23 @@ def __init__(self, ancient_path, name, uses_compression):
6881
self.ancient_path = ancient_path
6982
self.name = name
7083
self.uses_compression = uses_compression
71-
print(f'opening freezer table. name={self.name}')
84+
logger.debug(f'opening freezer table. name={self.name}')
7285

7386
self.index_file = open(os.path.join(ancient_path, self.index_file_name), 'rb')
7487
stat_result = os.stat(self.index_file.fileno())
7588
index_file_size = stat_result.st_size
7689
assert index_file_size % 6 == 0, index_file_size
77-
print(f'index_size={index_file_size} ({index_file_size // 6} entries)')
90+
logger.debug(f'index_size={index_file_size} ({index_file_size // 6} entries)')
7891
self.entries = index_file_size // 6
7992

8093
first_index_bytes = self.index_file.read(6)
8194
first_index = GethFreezerIndexEntry.from_bytes(first_index_bytes)
82-
print(f'first_index={first_index}')
95+
logger.debug(f'first_index={first_index}')
8396

8497
self.index_file.seek(-6, 2)
8598
last_index_bytes = self.index_file.read(6)
8699
last_index = GethFreezerIndexEntry.from_bytes(last_index_bytes)
87-
print(f'last_index={last_index}')
100+
logger.debug(f'last_index={last_index}')
88101

89102
self._data_files = dict()
90103

@@ -137,6 +150,17 @@ def __del__(self) -> None:
137150
self.index_file.close()
138151

139152

153+
class BlockBody(rlp.Serializable):
154+
"This is how geth stores block bodies"
155+
fields = [
156+
('transactions', CountableList(BaseTransactionFields)),
157+
('uncles', CountableList(BlockHeader)),
158+
]
159+
160+
def __repr__(self) -> str:
161+
return f'BlockBody(txns={self.transactions}, uncles={self.uncles})'
162+
163+
140164
class GethDatabase:
141165
def __init__(self, path):
142166
self.db = plyvel.DB(
@@ -149,6 +173,7 @@ def __init__(self, path):
149173
ancient_path = os.path.join(path, 'ancient')
150174
self.ancient_hashes = GethFreezerTable(ancient_path, 'hashes', False)
151175
self.ancient_headers = GethFreezerTable(ancient_path, 'headers', True)
176+
self.ancient_bodies = GethFreezerTable(ancient_path, 'bodies', True)
152177

153178
if self.database_version != b'\x07':
154179
raise Exception(f'geth database version {self.database_version} is not supported')
@@ -166,8 +191,10 @@ def block_num_for_hash(self, header_hash: bytes) -> int:
166191
raw_num = self.db.get(GethKeys.block_number_for_header_hash(header_hash))
167192
return struct.unpack('>Q', raw_num)[0]
168193

169-
def block_header(self, block_number: int, header_hash: bytes) -> BlockHeader:
170-
# This also needs to check the ancient db
194+
def block_header(self, block_number: int, header_hash: bytes = None) -> BlockHeader:
195+
if header_hash is None:
196+
header_hash = self.header_hash_for_block_number(block_number)
197+
171198
raw_data = self.db.get(GethKeys.block_header(block_number, header_hash))
172199
if raw_data is not None:
173200
return rlp.decode(raw_data, sedes=BlockHeader)
@@ -184,28 +211,34 @@ def header_hash_for_block_number(self, block_number: int) -> bytes:
184211

185212
return self.ancient_hashes.get(block_number)
186213

214+
def block_body(self, block_number: int, header_hash: bytes = None):
215+
if header_hash is None:
216+
header_hash = self.header_hash_for_block_number(block_number)
217+
218+
raw_data = self.db.get(GethKeys.block_body(block_number, header_hash))
219+
if raw_data is not None:
220+
return rlp.decode(raw_data, sedes=BlockBody)
221+
222+
raw_data = self.ancient_bodies.get(block_number)
223+
return rlp.decode(raw_data, sedes=BlockBody)
224+
187225

188226
def main(args):
189-
# Open geth database
227+
# 1. Open Geth database
228+
190229
gethdb = GethDatabase(args.gethdb)
191230

192231
last_block = gethdb.last_block_hash
193232
last_block_num = gethdb.block_num_for_hash(last_block)
194-
print('geth database opened')
195-
print(f'found chain tip: header_hash={humanize_hash(last_block)} block_number={last_block_num}')
196-
197-
print(f'header: {len(gethdb.block_header(last_block_num, last_block))}')
233+
logger.info('geth database opened')
234+
logger.info(f'found geth chain tip: header_hash={humanize_hash(last_block)} block_number={last_block_num}')
198235

199236
genesis_hash = gethdb.header_hash_for_block_number(0)
200237
genesis_header = gethdb.block_header(0, genesis_hash)
201-
print(f'genesis header: {genesis_header}')
202238
assert genesis_header == MAINNET_GENESIS_HEADER
239+
logger.info(f'geth genesis header matches expected genesis')
203240

204-
first_hash = gethdb.header_hash_for_block_number(1)
205-
first_block = gethdb.block_header(1, first_hash)
206-
print(f'first header: {first_block}')
207-
208-
# Create trinity database
241+
# 2. Create trinity database
209242

210243
db_already_existed = False
211244
if os.path.exists(args.destdb):
@@ -214,33 +247,51 @@ def main(args):
214247
leveldb = LevelDB(db_path=Path(args.destdb), max_open_files=16)
215248

216249
if not db_already_existed:
217-
print(f'Trinity database did not already exist, initializing it now')
250+
logger.info(f'Trinity database did not already exist, initializing it now')
218251
chain = MainnetChain.from_genesis_header(leveldb, MAINNET_GENESIS_HEADER)
219252
else:
220253
chain = MainnetChain(leveldb)
221254

222255
headerdb = chain.headerdb
223256

257+
# 3. Import headers + bodies
258+
224259
canonical_head = headerdb.get_canonical_head()
225-
print(f'starting copy from trinity\'s canonical head: {canonical_head}')
260+
logger.info(f'starting import from trinity\'s canonical head: {canonical_head}')
226261

227-
# verify the trinity database matches what geth has
262+
# fail fast if geth disagrees with trinity's canonical head
228263
geth_header = gethdb.block_header(canonical_head.block_number, canonical_head.hash)
229264
assert geth_header.hash == canonical_head.hash
230265

231266
for i in range(canonical_head.block_number, last_block_num + 1):
232267
header_hash = gethdb.header_hash_for_block_number(i)
233268
header = gethdb.block_header(i, header_hash)
234269

235-
headerdb.persist_header(header)
270+
body = gethdb.block_body(i)
271+
block_class = chain.get_vm_class(header).get_block_class()
272+
block = block_class(header, body.transactions, body.uncles)
273+
chain.chaindb.persist_block(block)
236274

237275
if i % 1000 == 0:
238-
print(f'current canonical header: {headerdb.get_canonical_head()}')
276+
logger.debug(f'current canonical header: {headerdb.get_canonical_head()}')
239277

240-
return
278+
# some final checks, these should never fail
279+
canonical_head = headerdb.get_canonical_head()
280+
geth_last_block_hash = gethdb.last_block_hash
281+
geth_last_block_num = gethdb.block_num_for_hash(geth_last_block_hash)
282+
assert canonical_head.hash == geth_last_block_hash
283+
assert canonical_head.block_number == geth_last_block_num
284+
285+
logger.info('finished importing headers + bodies')
241286

242287

243288
if __name__ == "__main__":
289+
logging.basicConfig(
290+
level=logging.DEBUG,
291+
format='%(asctime)s.%(msecs)03d %(levelname)s: %(message)s',
292+
datefmt='%H:%M:%S'
293+
)
294+
244295
parser = argparse.ArgumentParser()
245296
parser.add_argument('-gethdb', type=str, required=True)
246297
parser.add_argument('-destdb', type=str, required=True)

0 commit comments

Comments
 (0)