Skip to content

Commit 4b418a9

Browse files
committed
test: Add test_framework/bdb.py module for inspecting bdb files
For upgrade tests and possibly other tests, it is useful to inspect the bdb file for the wallet (i.e. the wallet.dat file). test_framework/bdb.py is an implementation of bdb file deserialization specific for Bitcoin Core's usage.
1 parent 092fc43 commit 4b418a9

File tree

1 file changed

+152
-0
lines changed
  • test/functional/test_framework

1 file changed

+152
-0
lines changed

test/functional/test_framework/bdb.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2020 The Bitcoin Core developers
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
"""
6+
Utilities for working directly with the wallet's BDB database file
7+
8+
This is specific to the configuration of BDB used in this project:
9+
- pagesize: 4096 bytes
10+
- Outer database contains single subdatabase named 'main'
11+
- btree
12+
- btree leaf pages
13+
14+
Each key-value pair is two entries in a btree leaf. The first is the key, the one that follows
15+
is the value. And so on. Note that the entry data is itself not in the correct order. Instead
16+
entry offsets are stored in the correct order and those offsets are needed to then retrieve
17+
the data itself.
18+
19+
Page format can be found in BDB source code dbinc/db_page.h
20+
This only implements the deserialization of btree metadata pages and normal btree pages. Overflow
21+
pages are not implemented but may be needed in the future if dealing with wallets with large
22+
transactions.
23+
24+
`db_dump -da wallet.dat` is useful to see the data in a wallet.dat BDB file
25+
"""
26+
27+
import binascii
28+
import struct
29+
30+
# Important constants
31+
PAGESIZE = 4096
32+
OUTER_META_PAGE = 0
33+
INNER_META_PAGE = 2
34+
35+
# Page type values
36+
BTREE_INTERNAL = 3
37+
BTREE_LEAF = 5
38+
BTREE_META = 9
39+
40+
# Some magic numbers for sanity checking
41+
BTREE_MAGIC = 0x053162
42+
DB_VERSION = 9
43+
44+
# Deserializes a leaf page into a dict.
45+
# Btree internal pages have the same header, for those, return None.
46+
# For the btree leaf pages, deserialize them and put all the data into a dict
47+
def dump_leaf_page(data):
48+
page_info = {}
49+
page_header = data[0:26]
50+
_, pgno, prev_pgno, next_pgno, entries, hf_offset, level, pg_type = struct.unpack('QIIIHHBB', page_header)
51+
page_info['pgno'] = pgno
52+
page_info['prev_pgno'] = prev_pgno
53+
page_info['next_pgno'] = next_pgno
54+
page_info['entries'] = entries
55+
page_info['hf_offset'] = hf_offset
56+
page_info['level'] = level
57+
page_info['pg_type'] = pg_type
58+
page_info['entry_offsets'] = struct.unpack('{}H'.format(entries), data[26:26 + entries * 2])
59+
page_info['entries'] = []
60+
61+
if pg_type == BTREE_INTERNAL:
62+
# Skip internal pages. These are the internal nodes of the btree and don't contain anything relevant to us
63+
return None
64+
65+
assert pg_type == BTREE_LEAF, 'A non-btree leaf page has been encountered while dumping leaves'
66+
67+
for i in range(0, entries):
68+
offset = page_info['entry_offsets'][i]
69+
entry = {'offset': offset}
70+
page_data_header = data[offset:offset + 3]
71+
e_len, pg_type = struct.unpack('HB', page_data_header)
72+
entry['len'] = e_len
73+
entry['pg_type'] = pg_type
74+
entry['data'] = data[offset + 3:offset + 3 + e_len]
75+
page_info['entries'].append(entry)
76+
77+
return page_info
78+
79+
# Deserializes a btree metadata page into a dict.
80+
# Does a simple sanity check on the magic value, type, and version
81+
def dump_meta_page(page):
82+
# metadata page
83+
# general metadata
84+
metadata = {}
85+
meta_page = page[0:72]
86+
_, pgno, magic, version, pagesize, encrypt_alg, pg_type, metaflags, _, free, last_pgno, nparts, key_count, record_count, flags, uid = struct.unpack('QIIIIBBBBIIIIII20s', meta_page)
87+
metadata['pgno'] = pgno
88+
metadata['magic'] = magic
89+
metadata['version'] = version
90+
metadata['pagesize'] = pagesize
91+
metadata['encrypt_alg'] = encrypt_alg
92+
metadata['pg_type'] = pg_type
93+
metadata['metaflags'] = metaflags
94+
metadata['free'] = free
95+
metadata['last_pgno'] = last_pgno
96+
metadata['nparts'] = nparts
97+
metadata['key_count'] = key_count
98+
metadata['record_count'] = record_count
99+
metadata['flags'] = flags
100+
metadata['uid'] = binascii.hexlify(uid)
101+
102+
assert magic == BTREE_MAGIC, 'bdb magic does not match bdb btree magic'
103+
assert pg_type == BTREE_META, 'Metadata page is not a btree metadata page'
104+
assert version == DB_VERSION, 'Database too new'
105+
106+
# btree metadata
107+
btree_meta_page = page[72:512]
108+
_, minkey, re_len, re_pad, root, _, crypto_magic, _, iv, chksum = struct.unpack('IIIII368sI12s16s20s', btree_meta_page)
109+
metadata['minkey'] = minkey
110+
metadata['re_len'] = re_len
111+
metadata['re_pad'] = re_pad
112+
metadata['root'] = root
113+
metadata['crypto_magic'] = crypto_magic
114+
metadata['iv'] = binascii.hexlify(iv)
115+
metadata['chksum'] = binascii.hexlify(chksum)
116+
return metadata
117+
118+
# Given the dict from dump_leaf_page, get the key-value pairs and put them into a dict
119+
def extract_kv_pairs(page_data):
120+
out = {}
121+
last_key = None
122+
for i, entry in enumerate(page_data['entries']):
123+
# By virtue of these all being pairs, even number entries are keys, and odd are values
124+
if i % 2 == 0:
125+
out[entry['data']] = b''
126+
last_key = entry['data']
127+
else:
128+
out[last_key] = entry['data']
129+
return out
130+
131+
# Extract the key-value pairs of the BDB file given in filename
132+
def dump_bdb_kv(filename):
133+
# Read in the BDB file and start deserializing it
134+
pages = []
135+
with open(filename, 'rb') as f:
136+
data = f.read(PAGESIZE)
137+
while len(data) > 0:
138+
pages.append(data)
139+
data = f.read(PAGESIZE)
140+
141+
# Sanity check the meta pages
142+
dump_meta_page(pages[OUTER_META_PAGE])
143+
dump_meta_page(pages[INNER_META_PAGE])
144+
145+
# Fetch the kv pairs from the leaf pages
146+
kv = {}
147+
for i in range(3, len(pages)):
148+
info = dump_leaf_page(pages[i])
149+
if info is not None:
150+
info_kv = extract_kv_pairs(info)
151+
kv = {**kv, **info_kv}
152+
return kv

0 commit comments

Comments
 (0)