|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# Copyright (c) 2020 The Bitcoin Core developers |
| 3 | +# Distributed under the MIT software license, see the accompanying |
| 4 | +# file COPYING or http://www.opensource.org/licenses/mit-license.php. |
| 5 | +""" |
| 6 | +Utilities for working directly with the wallet's BDB database file |
| 7 | +
|
| 8 | +This is specific to the configuration of BDB used in this project: |
| 9 | + - pagesize: 4096 bytes |
| 10 | + - Outer database contains single subdatabase named 'main' |
| 11 | + - btree |
| 12 | + - btree leaf pages |
| 13 | +
|
| 14 | +Each key-value pair is two entries in a btree leaf. The first is the key, the one that follows |
| 15 | +is the value. And so on. Note that the entry data is itself not in the correct order. Instead |
| 16 | +entry offsets are stored in the correct order and those offsets are needed to then retrieve |
| 17 | +the data itself. |
| 18 | +
|
| 19 | +Page format can be found in BDB source code dbinc/db_page.h |
| 20 | +This only implements the deserialization of btree metadata pages and normal btree pages. Overflow |
| 21 | +pages are not implemented but may be needed in the future if dealing with wallets with large |
| 22 | +transactions. |
| 23 | +
|
| 24 | +`db_dump -da wallet.dat` is useful to see the data in a wallet.dat BDB file |
| 25 | +""" |
| 26 | + |
| 27 | +import binascii |
| 28 | +import struct |
| 29 | + |
| 30 | +# Important constants |
| 31 | +PAGESIZE = 4096 |
| 32 | +OUTER_META_PAGE = 0 |
| 33 | +INNER_META_PAGE = 2 |
| 34 | + |
| 35 | +# Page type values |
| 36 | +BTREE_INTERNAL = 3 |
| 37 | +BTREE_LEAF = 5 |
| 38 | +BTREE_META = 9 |
| 39 | + |
| 40 | +# Some magic numbers for sanity checking |
| 41 | +BTREE_MAGIC = 0x053162 |
| 42 | +DB_VERSION = 9 |
| 43 | + |
| 44 | +# Deserializes a leaf page into a dict. |
| 45 | +# Btree internal pages have the same header, for those, return None. |
| 46 | +# For the btree leaf pages, deserialize them and put all the data into a dict |
| 47 | +def dump_leaf_page(data): |
| 48 | + page_info = {} |
| 49 | + page_header = data[0:26] |
| 50 | + _, pgno, prev_pgno, next_pgno, entries, hf_offset, level, pg_type = struct.unpack('QIIIHHBB', page_header) |
| 51 | + page_info['pgno'] = pgno |
| 52 | + page_info['prev_pgno'] = prev_pgno |
| 53 | + page_info['next_pgno'] = next_pgno |
| 54 | + page_info['entries'] = entries |
| 55 | + page_info['hf_offset'] = hf_offset |
| 56 | + page_info['level'] = level |
| 57 | + page_info['pg_type'] = pg_type |
| 58 | + page_info['entry_offsets'] = struct.unpack('{}H'.format(entries), data[26:26 + entries * 2]) |
| 59 | + page_info['entries'] = [] |
| 60 | + |
| 61 | + if pg_type == BTREE_INTERNAL: |
| 62 | + # Skip internal pages. These are the internal nodes of the btree and don't contain anything relevant to us |
| 63 | + return None |
| 64 | + |
| 65 | + assert pg_type == BTREE_LEAF, 'A non-btree leaf page has been encountered while dumping leaves' |
| 66 | + |
| 67 | + for i in range(0, entries): |
| 68 | + offset = page_info['entry_offsets'][i] |
| 69 | + entry = {'offset': offset} |
| 70 | + page_data_header = data[offset:offset + 3] |
| 71 | + e_len, pg_type = struct.unpack('HB', page_data_header) |
| 72 | + entry['len'] = e_len |
| 73 | + entry['pg_type'] = pg_type |
| 74 | + entry['data'] = data[offset + 3:offset + 3 + e_len] |
| 75 | + page_info['entries'].append(entry) |
| 76 | + |
| 77 | + return page_info |
| 78 | + |
| 79 | +# Deserializes a btree metadata page into a dict. |
| 80 | +# Does a simple sanity check on the magic value, type, and version |
| 81 | +def dump_meta_page(page): |
| 82 | + # metadata page |
| 83 | + # general metadata |
| 84 | + metadata = {} |
| 85 | + meta_page = page[0:72] |
| 86 | + _, pgno, magic, version, pagesize, encrypt_alg, pg_type, metaflags, _, free, last_pgno, nparts, key_count, record_count, flags, uid = struct.unpack('QIIIIBBBBIIIIII20s', meta_page) |
| 87 | + metadata['pgno'] = pgno |
| 88 | + metadata['magic'] = magic |
| 89 | + metadata['version'] = version |
| 90 | + metadata['pagesize'] = pagesize |
| 91 | + metadata['encrypt_alg'] = encrypt_alg |
| 92 | + metadata['pg_type'] = pg_type |
| 93 | + metadata['metaflags'] = metaflags |
| 94 | + metadata['free'] = free |
| 95 | + metadata['last_pgno'] = last_pgno |
| 96 | + metadata['nparts'] = nparts |
| 97 | + metadata['key_count'] = key_count |
| 98 | + metadata['record_count'] = record_count |
| 99 | + metadata['flags'] = flags |
| 100 | + metadata['uid'] = binascii.hexlify(uid) |
| 101 | + |
| 102 | + assert magic == BTREE_MAGIC, 'bdb magic does not match bdb btree magic' |
| 103 | + assert pg_type == BTREE_META, 'Metadata page is not a btree metadata page' |
| 104 | + assert version == DB_VERSION, 'Database too new' |
| 105 | + |
| 106 | + # btree metadata |
| 107 | + btree_meta_page = page[72:512] |
| 108 | + _, minkey, re_len, re_pad, root, _, crypto_magic, _, iv, chksum = struct.unpack('IIIII368sI12s16s20s', btree_meta_page) |
| 109 | + metadata['minkey'] = minkey |
| 110 | + metadata['re_len'] = re_len |
| 111 | + metadata['re_pad'] = re_pad |
| 112 | + metadata['root'] = root |
| 113 | + metadata['crypto_magic'] = crypto_magic |
| 114 | + metadata['iv'] = binascii.hexlify(iv) |
| 115 | + metadata['chksum'] = binascii.hexlify(chksum) |
| 116 | + return metadata |
| 117 | + |
| 118 | +# Given the dict from dump_leaf_page, get the key-value pairs and put them into a dict |
| 119 | +def extract_kv_pairs(page_data): |
| 120 | + out = {} |
| 121 | + last_key = None |
| 122 | + for i, entry in enumerate(page_data['entries']): |
| 123 | + # By virtue of these all being pairs, even number entries are keys, and odd are values |
| 124 | + if i % 2 == 0: |
| 125 | + out[entry['data']] = b'' |
| 126 | + last_key = entry['data'] |
| 127 | + else: |
| 128 | + out[last_key] = entry['data'] |
| 129 | + return out |
| 130 | + |
| 131 | +# Extract the key-value pairs of the BDB file given in filename |
| 132 | +def dump_bdb_kv(filename): |
| 133 | + # Read in the BDB file and start deserializing it |
| 134 | + pages = [] |
| 135 | + with open(filename, 'rb') as f: |
| 136 | + data = f.read(PAGESIZE) |
| 137 | + while len(data) > 0: |
| 138 | + pages.append(data) |
| 139 | + data = f.read(PAGESIZE) |
| 140 | + |
| 141 | + # Sanity check the meta pages |
| 142 | + dump_meta_page(pages[OUTER_META_PAGE]) |
| 143 | + dump_meta_page(pages[INNER_META_PAGE]) |
| 144 | + |
| 145 | + # Fetch the kv pairs from the leaf pages |
| 146 | + kv = {} |
| 147 | + for i in range(3, len(pages)): |
| 148 | + info = dump_leaf_page(pages[i]) |
| 149 | + if info is not None: |
| 150 | + info_kv = extract_kv_pairs(info) |
| 151 | + kv = {**kv, **info_kv} |
| 152 | + return kv |
0 commit comments