5
5
"""
6
6
7
7
import argparse
8
+ import logging
8
9
import os
9
10
import os .path
10
11
from pathlib import Path
16
17
17
18
from eth_utils import humanize_hash
18
19
import rlp
20
+ from rlp .sedes import CountableList
19
21
20
22
from eth .chains .mainnet import MAINNET_GENESIS_HEADER , MainnetChain
21
23
from eth .db .backends .level import LevelDB
22
24
from eth .rlp .headers import BlockHeader
25
+ from eth .rlp .transactions import BaseTransactionFields
26
+
27
+
28
+ logger = logging .getLogger ('importer' )
23
29
24
30
25
31
class GethKeys :
@@ -31,6 +37,8 @@ class GethKeys:
31
37
headerNumberPrefix = b'H'
32
38
headerHashSuffix = b'n'
33
39
40
+ blockBodyPrefix = b'b'
41
+
34
42
@classmethod
35
43
def header_hash_for_block_number (cls , block_number : int ) -> bytes :
36
44
"The key to get the hash of the header with the given block number"
@@ -47,6 +55,11 @@ def block_header(cls, block_number: int, header_hash: bytes) -> bytes:
47
55
packed_block_number = struct .pack ('>Q' , block_number )
48
56
return cls .headerPrefix + packed_block_number + header_hash
49
57
58
+ @classmethod
59
+ def block_body (cls , block_number : int , header_hash : bytes ) -> bytes :
60
+ packed_block_number = struct .pack ('>Q' , block_number )
61
+ return cls .blockBodyPrefix + packed_block_number + header_hash
62
+
50
63
51
64
class GethFreezerIndexEntry :
52
65
def __init__ (self , filenum : int , offset : int ):
@@ -68,23 +81,23 @@ def __init__(self, ancient_path, name, uses_compression):
68
81
self .ancient_path = ancient_path
69
82
self .name = name
70
83
self .uses_compression = uses_compression
71
- print (f'opening freezer table. name={ self .name } ' )
84
+ logger . debug (f'opening freezer table. name={ self .name } ' )
72
85
73
86
self .index_file = open (os .path .join (ancient_path , self .index_file_name ), 'rb' )
74
87
stat_result = os .stat (self .index_file .fileno ())
75
88
index_file_size = stat_result .st_size
76
89
assert index_file_size % 6 == 0 , index_file_size
77
- print (f'index_size={ index_file_size } ({ index_file_size // 6 } entries)' )
90
+ logger . debug (f'index_size={ index_file_size } ({ index_file_size // 6 } entries)' )
78
91
self .entries = index_file_size // 6
79
92
80
93
first_index_bytes = self .index_file .read (6 )
81
94
first_index = GethFreezerIndexEntry .from_bytes (first_index_bytes )
82
- print (f'first_index={ first_index } ' )
95
+ logger . debug (f'first_index={ first_index } ' )
83
96
84
97
self .index_file .seek (- 6 , 2 )
85
98
last_index_bytes = self .index_file .read (6 )
86
99
last_index = GethFreezerIndexEntry .from_bytes (last_index_bytes )
87
- print (f'last_index={ last_index } ' )
100
+ logger . debug (f'last_index={ last_index } ' )
88
101
89
102
self ._data_files = dict ()
90
103
@@ -137,6 +150,17 @@ def __del__(self) -> None:
137
150
self .index_file .close ()
138
151
139
152
153
+ class BlockBody (rlp .Serializable ):
154
+ "This is how geth stores block bodies"
155
+ fields = [
156
+ ('transactions' , CountableList (BaseTransactionFields )),
157
+ ('uncles' , CountableList (BlockHeader )),
158
+ ]
159
+
160
+ def __repr__ (self ) -> str :
161
+ return f'BlockBody(txns={ self .transactions } , uncles={ self .uncles } )'
162
+
163
+
140
164
class GethDatabase :
141
165
def __init__ (self , path ):
142
166
self .db = plyvel .DB (
@@ -149,6 +173,7 @@ def __init__(self, path):
149
173
ancient_path = os .path .join (path , 'ancient' )
150
174
self .ancient_hashes = GethFreezerTable (ancient_path , 'hashes' , False )
151
175
self .ancient_headers = GethFreezerTable (ancient_path , 'headers' , True )
176
+ self .ancient_bodies = GethFreezerTable (ancient_path , 'bodies' , True )
152
177
153
178
if self .database_version != b'\x07 ' :
154
179
raise Exception (f'geth database version { self .database_version } is not supported' )
@@ -166,8 +191,10 @@ def block_num_for_hash(self, header_hash: bytes) -> int:
166
191
raw_num = self .db .get (GethKeys .block_number_for_header_hash (header_hash ))
167
192
return struct .unpack ('>Q' , raw_num )[0 ]
168
193
169
- def block_header (self , block_number : int , header_hash : bytes ) -> BlockHeader :
170
- # This also needs to check the ancient db
194
+ def block_header (self , block_number : int , header_hash : bytes = None ) -> BlockHeader :
195
+ if header_hash is None :
196
+ header_hash = self .header_hash_for_block_number (block_number )
197
+
171
198
raw_data = self .db .get (GethKeys .block_header (block_number , header_hash ))
172
199
if raw_data is not None :
173
200
return rlp .decode (raw_data , sedes = BlockHeader )
@@ -184,28 +211,34 @@ def header_hash_for_block_number(self, block_number: int) -> bytes:
184
211
185
212
return self .ancient_hashes .get (block_number )
186
213
214
+ def block_body (self , block_number : int , header_hash : bytes = None ):
215
+ if header_hash is None :
216
+ header_hash = self .header_hash_for_block_number (block_number )
217
+
218
+ raw_data = self .db .get (GethKeys .block_body (block_number , header_hash ))
219
+ if raw_data is not None :
220
+ return rlp .decode (raw_data , sedes = BlockBody )
221
+
222
+ raw_data = self .ancient_bodies .get (block_number )
223
+ return rlp .decode (raw_data , sedes = BlockBody )
224
+
187
225
188
226
def main (args ):
189
- # Open geth database
227
+ # 1. Open Geth database
228
+
190
229
gethdb = GethDatabase (args .gethdb )
191
230
192
231
last_block = gethdb .last_block_hash
193
232
last_block_num = gethdb .block_num_for_hash (last_block )
194
- print ('geth database opened' )
195
- print (f'found chain tip: header_hash={ humanize_hash (last_block )} block_number={ last_block_num } ' )
196
-
197
- print (f'header: { len (gethdb .block_header (last_block_num , last_block ))} ' )
233
+ logger .info ('geth database opened' )
234
+ logger .info (f'found geth chain tip: header_hash={ humanize_hash (last_block )} block_number={ last_block_num } ' )
198
235
199
236
genesis_hash = gethdb .header_hash_for_block_number (0 )
200
237
genesis_header = gethdb .block_header (0 , genesis_hash )
201
- print (f'genesis header: { genesis_header } ' )
202
238
assert genesis_header == MAINNET_GENESIS_HEADER
239
+ logger .info (f'geth genesis header matches expected genesis' )
203
240
204
- first_hash = gethdb .header_hash_for_block_number (1 )
205
- first_block = gethdb .block_header (1 , first_hash )
206
- print (f'first header: { first_block } ' )
207
-
208
- # Create trinity database
241
+ # 2. Create trinity database
209
242
210
243
db_already_existed = False
211
244
if os .path .exists (args .destdb ):
@@ -214,33 +247,51 @@ def main(args):
214
247
leveldb = LevelDB (db_path = Path (args .destdb ), max_open_files = 16 )
215
248
216
249
if not db_already_existed :
217
- print (f'Trinity database did not already exist, initializing it now' )
250
+ logger . info (f'Trinity database did not already exist, initializing it now' )
218
251
chain = MainnetChain .from_genesis_header (leveldb , MAINNET_GENESIS_HEADER )
219
252
else :
220
253
chain = MainnetChain (leveldb )
221
254
222
255
headerdb = chain .headerdb
223
256
257
+ # 3. Import headers + bodies
258
+
224
259
canonical_head = headerdb .get_canonical_head ()
225
- print (f'starting copy from trinity\' s canonical head: { canonical_head } ' )
260
+ logger . info (f'starting import from trinity\' s canonical head: { canonical_head } ' )
226
261
227
- # verify the trinity database matches what geth has
262
+ # fail fast if geth disagrees with trinity's canonical head
228
263
geth_header = gethdb .block_header (canonical_head .block_number , canonical_head .hash )
229
264
assert geth_header .hash == canonical_head .hash
230
265
231
266
for i in range (canonical_head .block_number , last_block_num + 1 ):
232
267
header_hash = gethdb .header_hash_for_block_number (i )
233
268
header = gethdb .block_header (i , header_hash )
234
269
235
- headerdb .persist_header (header )
270
+ body = gethdb .block_body (i )
271
+ block_class = chain .get_vm_class (header ).get_block_class ()
272
+ block = block_class (header , body .transactions , body .uncles )
273
+ chain .chaindb .persist_block (block )
236
274
237
275
if i % 1000 == 0 :
238
- print (f'current canonical header: { headerdb .get_canonical_head ()} ' )
276
+ logger . debug (f'current canonical header: { headerdb .get_canonical_head ()} ' )
239
277
240
- return
278
+ # some final checks, these should never fail
279
+ canonical_head = headerdb .get_canonical_head ()
280
+ geth_last_block_hash = gethdb .last_block_hash
281
+ geth_last_block_num = gethdb .block_num_for_hash (geth_last_block_hash )
282
+ assert canonical_head .hash == geth_last_block_hash
283
+ assert canonical_head .block_number == geth_last_block_num
284
+
285
+ logger .info ('finished importing headers + bodies' )
241
286
242
287
243
288
if __name__ == "__main__" :
289
+ logging .basicConfig (
290
+ level = logging .DEBUG ,
291
+ format = '%(asctime)s.%(msecs)03d %(levelname)s: %(message)s' ,
292
+ datefmt = '%H:%M:%S'
293
+ )
294
+
244
295
parser = argparse .ArgumentParser ()
245
296
parser .add_argument ('-gethdb' , type = str , required = True )
246
297
parser .add_argument ('-destdb' , type = str , required = True )
0 commit comments