|
| 1 | +#!/usr/bin/env python |
| 2 | + |
| 3 | +""" |
| 4 | +Create a Trinity database by importing the current state of a Geth database |
| 5 | +""" |
| 6 | + |
| 7 | +import argparse |
| 8 | +import os |
| 9 | +import os.path |
| 10 | +from pathlib import Path |
| 11 | +import shutil |
| 12 | +import snappy |
| 13 | +import struct |
| 14 | + |
| 15 | +import plyvel |
| 16 | + |
| 17 | +from eth_utils import humanize_hash |
| 18 | +import rlp |
| 19 | + |
| 20 | +from eth.chains.mainnet import MAINNET_GENESIS_HEADER, MainnetChain |
| 21 | +from eth.db.backends.level import LevelDB |
| 22 | +from eth.rlp.headers import BlockHeader |
| 23 | + |
| 24 | + |
| 25 | +class GethKeys: |
| 26 | + # from https://github.com/ethereum/go-ethereum/blob/master/core/rawdb/schema.go |
| 27 | + DatabaseVersion = b'DatabaseVersion' |
| 28 | + HeadBlock = b'LastBlock' |
| 29 | + |
| 30 | + headerPrefix = b'h' |
| 31 | + headerNumberPrefix = b'H' |
| 32 | + headerHashSuffix = b'n' |
| 33 | + |
| 34 | + @classmethod |
| 35 | + def header_hash_for_block_number(cls, block_number: int) -> bytes: |
| 36 | + "The key to get the hash of the header with the given block number" |
| 37 | + packed_block_number = struct.pack('>Q', block_number) |
| 38 | + return cls.headerPrefix + packed_block_number + cls.headerHashSuffix |
| 39 | + |
| 40 | + @classmethod |
| 41 | + def block_number_for_header_hash(cls, header_hash: bytes) -> bytes: |
| 42 | + "The key to get the block number of the header with the given hash" |
| 43 | + return cls.headerNumberPrefix + header_hash |
| 44 | + |
| 45 | + @classmethod |
| 46 | + def block_header(cls, block_number: int, header_hash: bytes) -> bytes: |
| 47 | + packed_block_number = struct.pack('>Q', block_number) |
| 48 | + return cls.headerPrefix + packed_block_number + header_hash |
| 49 | + |
| 50 | + |
| 51 | +class GethFreezerIndexEntry: |
| 52 | + def __init__(self, filenum: int, offset: int): |
| 53 | + self.filenum = filenum |
| 54 | + self.offset = offset |
| 55 | + |
| 56 | + @classmethod |
| 57 | + def from_bytes(cls, data: bytes) -> 'GethFreezerIndexEntry': |
| 58 | + assert len(data) == 6 |
| 59 | + filenum, offset = struct.unpack('>HI', data) |
| 60 | + return cls(filenum, offset) |
| 61 | + |
| 62 | + def __repr__(self): |
| 63 | + return f'IndexEntry(filenum={self.filenum}, offset={self.offset})' |
| 64 | + |
| 65 | + |
| 66 | +class GethFreezerTable: |
| 67 | + def __init__(self, ancient_path, name, uses_compression): |
| 68 | + self.ancient_path = ancient_path |
| 69 | + self.name = name |
| 70 | + self.uses_compression = uses_compression |
| 71 | + print(f'opening freezer table. name={self.name}') |
| 72 | + |
| 73 | + self.index_file = open(os.path.join(ancient_path, self.index_file_name), 'rb') |
| 74 | + stat_result = os.stat(self.index_file.fileno()) |
| 75 | + index_file_size = stat_result.st_size |
| 76 | + assert index_file_size % 6 == 0, index_file_size |
| 77 | + print(f'index_size={index_file_size} ({index_file_size // 6} entries)') |
| 78 | + self.entries = index_file_size // 6 |
| 79 | + |
| 80 | + first_index_bytes = self.index_file.read(6) |
| 81 | + first_index = GethFreezerIndexEntry.from_bytes(first_index_bytes) |
| 82 | + print(f'first_index={first_index}') |
| 83 | + |
| 84 | + self.index_file.seek(-6, 2) |
| 85 | + last_index_bytes = self.index_file.read(6) |
| 86 | + last_index = GethFreezerIndexEntry.from_bytes(last_index_bytes) |
| 87 | + print(f'last_index={last_index}') |
| 88 | + |
| 89 | + self._data_files = dict() |
| 90 | + |
| 91 | + @property |
| 92 | + def index_file_name(self): |
| 93 | + suffix = 'cidx' if self.uses_compression else 'ridx' |
| 94 | + return f'{self.name}.{suffix}' |
| 95 | + |
| 96 | + def data_file_name(self, number: int): |
| 97 | + suffix = 'cdat' if self.uses_compression else 'rdat' |
| 98 | + return f'{self.name}.{number:04d}.{suffix}' |
| 99 | + |
| 100 | + def _data_file(self, number: int): |
| 101 | + if number not in self._data_files: |
| 102 | + path = os.path.join(self.ancient_path, self.data_file_name(number)) |
| 103 | + data_file = open(path, 'rb') |
| 104 | + self._data_files[number] = data_file |
| 105 | + |
| 106 | + return self._data_files[number] |
| 107 | + |
| 108 | + def get(self, number: int) -> bytes: |
| 109 | + assert number < self.entries |
| 110 | + |
| 111 | + self.index_file.seek(number * 6) |
| 112 | + entry_bytes = self.index_file.read(6) |
| 113 | + start_entry = GethFreezerIndexEntry.from_bytes(entry_bytes) |
| 114 | + |
| 115 | + # What happens if we're trying to read the last item? Won't this fail? |
| 116 | + # Is there always one extra entry in the index file? |
| 117 | + self.index_file.seek((number+1) * 6) |
| 118 | + entry_bytes = self.index_file.read(6) |
| 119 | + end_entry = GethFreezerIndexEntry.from_bytes(entry_bytes) |
| 120 | + |
| 121 | + if start_entry.filenum != end_entry.filenum: |
| 122 | + # Duplicates logic from freezer_table.go:getBounds |
| 123 | + start_entry = GethFreezerIndexEntry(end_entry.filenum, offset=0) |
| 124 | + |
| 125 | + data_file = self._data_file(start_entry.filenum) |
| 126 | + data_file.seek(start_entry.offset) |
| 127 | + data = data_file.read(end_entry.offset - start_entry.offset) |
| 128 | + |
| 129 | + if not self.uses_compression: |
| 130 | + return data |
| 131 | + |
| 132 | + return snappy.decompress(data) |
| 133 | + |
| 134 | + def __del__(self) -> None: |
| 135 | + for f in self._data_files.values(): |
| 136 | + f.close() |
| 137 | + self.index_file.close() |
| 138 | + |
| 139 | + |
| 140 | +class GethDatabase: |
| 141 | + def __init__(self, path): |
| 142 | + self.db = plyvel.DB( |
| 143 | + path, |
| 144 | + create_if_missing=False, |
| 145 | + error_if_exists=False, |
| 146 | + max_open_files=16 |
| 147 | + ) |
| 148 | + |
| 149 | + ancient_path = os.path.join(path, 'ancient') |
| 150 | + self.ancient_hashes = GethFreezerTable(ancient_path, 'hashes', False) |
| 151 | + self.ancient_headers = GethFreezerTable(ancient_path, 'headers', True) |
| 152 | + |
| 153 | + if self.database_version != b'\x07': |
| 154 | + raise Exception(f'geth database version {self.database_version} is not supported') |
| 155 | + |
| 156 | + @property |
| 157 | + def database_version(self) -> bytes: |
| 158 | + raw_version = self.db.get(GethKeys.DatabaseVersion) |
| 159 | + return rlp.decode(raw_version) |
| 160 | + |
| 161 | + @property |
| 162 | + def last_block_hash(self) -> bytes: |
| 163 | + return self.db.get(GethKeys.HeadBlock) |
| 164 | + |
| 165 | + def block_num_for_hash(self, header_hash: bytes) -> int: |
| 166 | + raw_num = self.db.get(GethKeys.block_number_for_header_hash(header_hash)) |
| 167 | + return struct.unpack('>Q', raw_num)[0] |
| 168 | + |
| 169 | + def block_header(self, block_number: int, header_hash: bytes) -> BlockHeader: |
| 170 | + # This also needs to check the ancient db |
| 171 | + raw_data = self.db.get(GethKeys.block_header(block_number, header_hash)) |
| 172 | + if raw_data is not None: |
| 173 | + return rlp.decode(raw_data, sedes=BlockHeader) |
| 174 | + |
| 175 | + raw_data = self.ancient_headers.get(block_number) |
| 176 | + return rlp.decode(raw_data, sedes=BlockHeader) |
| 177 | + |
| 178 | + def header_hash_for_block_number(self, block_number: int) -> bytes: |
| 179 | + # This needs to check the ancient db (freezerHashTable) |
| 180 | + result = self.db.get(GethKeys.header_hash_for_block_number(block_number)) |
| 181 | + |
| 182 | + if result is not None: |
| 183 | + return result |
| 184 | + |
| 185 | + return self.ancient_hashes.get(block_number) |
| 186 | + |
| 187 | + |
| 188 | +def main(args): |
| 189 | + # Open geth database |
| 190 | + gethdb = GethDatabase(args.gethdb) |
| 191 | + |
| 192 | + last_block = gethdb.last_block_hash |
| 193 | + last_block_num = gethdb.block_num_for_hash(last_block) |
| 194 | + print('geth database opened') |
| 195 | + print(f'found chain tip: header_hash={humanize_hash(last_block)} block_number={last_block_num}') |
| 196 | + |
| 197 | + print(f'header: {len(gethdb.block_header(last_block_num, last_block))}') |
| 198 | + |
| 199 | + genesis_hash = gethdb.header_hash_for_block_number(0) |
| 200 | + genesis_header = gethdb.block_header(0, genesis_hash) |
| 201 | + print(f'genesis header: {genesis_header}') |
| 202 | + assert genesis_header == MAINNET_GENESIS_HEADER |
| 203 | + |
| 204 | + first_hash = gethdb.header_hash_for_block_number(1) |
| 205 | + first_block = gethdb.block_header(1, first_hash) |
| 206 | + print(f'first header: {first_block}') |
| 207 | + |
| 208 | + # Create trinity database |
| 209 | + |
| 210 | + db_already_existed = False |
| 211 | + if os.path.exists(args.destdb): |
| 212 | + db_already_existed = True |
| 213 | + |
| 214 | + leveldb = LevelDB(db_path=Path(args.destdb), max_open_files=16) |
| 215 | + |
| 216 | + if not db_already_existed: |
| 217 | + print(f'Trinity database did not already exist, initializing it now') |
| 218 | + chain = MainnetChain.from_genesis_header(leveldb, MAINNET_GENESIS_HEADER) |
| 219 | + else: |
| 220 | + chain = MainnetChain(leveldb) |
| 221 | + |
| 222 | + headerdb = chain.headerdb |
| 223 | + |
| 224 | + canonical_head = headerdb.get_canonical_head() |
| 225 | + print(f'starting copy from trinity\'s canonical head: {canonical_head}') |
| 226 | + |
| 227 | + # verify the trinity database matches what geth has |
| 228 | + geth_header = gethdb.block_header(canonical_head.block_number, canonical_head.hash) |
| 229 | + assert geth_header.hash == canonical_head.hash |
| 230 | + |
| 231 | + for i in range(canonical_head.block_number, last_block_num + 1): |
| 232 | + header_hash = gethdb.header_hash_for_block_number(i) |
| 233 | + header = gethdb.block_header(i, header_hash) |
| 234 | + |
| 235 | + headerdb.persist_header(header) |
| 236 | + |
| 237 | + if i % 1000 == 0: |
| 238 | + print(f'current canonical header: {headerdb.get_canonical_head()}') |
| 239 | + |
| 240 | + return |
| 241 | + |
| 242 | + |
| 243 | +if __name__ == "__main__": |
| 244 | + parser = argparse.ArgumentParser() |
| 245 | + parser.add_argument('-gethdb', type=str, required=True) |
| 246 | + parser.add_argument('-destdb', type=str, required=True) |
| 247 | + args = parser.parse_args() |
| 248 | + |
| 249 | + main(args) |
0 commit comments