Skip to content

Commit ec99ed7

Browse files
committed
contrib: add tool to convert compact-serialized UTXO set to SQLite database
1 parent 9355578 commit ec99ed7

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed

contrib/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,11 @@ Command Line Tools
4343

4444
### [Completions](/contrib/completions) ###
4545
Shell completions for bash and fish.
46+
47+
UTXO Set Tools
48+
--------------
49+
50+
### [UTXO-to-SQLite](/contrib/utxo-tools/utxo_to_sqlite.py) ###
51+
This script converts a compact-serialized UTXO set (as generated by Bitcoin Core with `dumptxoutset`)
52+
to a SQLite3 database. For more details like e.g. the created table name and schema, refer to the
53+
module docstring on top of the script, which is also contained in the command's `--help` output.

contrib/utxo-tools/utxo_to_sqlite.py

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2024-present The Bitcoin Core developers
3+
# Distributed under the MIT software license, see the accompanying
4+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
5+
"""Tool to convert a compact-serialized UTXO set to a SQLite3 database.
6+
7+
The input UTXO set can be generated by Bitcoin Core with the `dumptxoutset` RPC:
8+
$ bitcoin-cli dumptxoutset ~/utxos.dat
9+
10+
The created database contains a table `utxos` with the following schema:
11+
(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)
12+
"""
13+
import argparse
14+
import os
15+
import sqlite3
16+
import sys
17+
import time
18+
19+
20+
UTXO_DUMP_MAGIC = b'utxo\xff'
21+
UTXO_DUMP_VERSION = 2
22+
NET_MAGIC_BYTES = {
23+
b"\xf9\xbe\xb4\xd9": "Mainnet",
24+
b"\x0a\x03\xcf\x40": "Signet",
25+
b"\x0b\x11\x09\x07": "Testnet3",
26+
b"\x1c\x16\x3f\x28": "Testnet4",
27+
b"\xfa\xbf\xb5\xda": "Regtest",
28+
}
29+
30+
31+
def read_varint(f):
32+
"""Equivalent of `ReadVarInt()` (see serialization module)."""
33+
n = 0
34+
while True:
35+
dat = f.read(1)[0]
36+
n = (n << 7) | (dat & 0x7f)
37+
if (dat & 0x80) > 0:
38+
n += 1
39+
else:
40+
return n
41+
42+
43+
def read_compactsize(f):
44+
"""Equivalent of `ReadCompactSize()` (see serialization module)."""
45+
n = f.read(1)[0]
46+
if n == 253:
47+
n = int.from_bytes(f.read(2), "little")
48+
elif n == 254:
49+
n = int.from_bytes(f.read(4), "little")
50+
elif n == 255:
51+
n = int.from_bytes(f.read(8), "little")
52+
return n
53+
54+
55+
def decompress_amount(x):
56+
"""Equivalent of `DecompressAmount()` (see compressor module)."""
57+
if x == 0:
58+
return 0
59+
x -= 1
60+
e = x % 10
61+
x //= 10
62+
n = 0
63+
if e < 9:
64+
d = (x % 9) + 1
65+
x //= 9
66+
n = x * 10 + d
67+
else:
68+
n = x + 1
69+
while e > 0:
70+
n *= 10
71+
e -= 1
72+
return n
73+
74+
75+
def decompress_script(f):
76+
"""Equivalent of `DecompressScript()` (see compressor module)."""
77+
size = read_varint(f) # sizes 0-5 encode compressed script types
78+
if size == 0: # P2PKH
79+
return bytes([0x76, 0xa9, 20]) + f.read(20) + bytes([0x88, 0xac])
80+
elif size == 1: # P2SH
81+
return bytes([0xa9, 20]) + f.read(20) + bytes([0x87])
82+
elif size in (2, 3): # P2PK (compressed)
83+
return bytes([33, size]) + f.read(32) + bytes([0xac])
84+
elif size in (4, 5): # P2PK (uncompressed)
85+
compressed_pubkey = bytes([size - 2]) + f.read(32)
86+
return bytes([65]) + decompress_pubkey(compressed_pubkey) + bytes([0xac])
87+
else: # others (bare multisig, segwit etc.)
88+
size -= 6
89+
assert size <= 10000, f"too long script with size {size}"
90+
return f.read(size)
91+
92+
93+
def decompress_pubkey(compressed_pubkey):
94+
"""Decompress pubkey by calculating y = sqrt(x^3 + 7) % p
95+
(see functions `secp256k1_eckey_pubkey_parse` and `secp256k1_ge_set_xo_var`).
96+
"""
97+
P = 2**256 - 2**32 - 977 # secp256k1 field size
98+
assert len(compressed_pubkey) == 33 and compressed_pubkey[0] in (2, 3)
99+
x = int.from_bytes(compressed_pubkey[1:], 'big')
100+
rhs = (x**3 + 7) % P
101+
y = pow(rhs, (P + 1)//4, P) # get sqrt using Tonelli-Shanks algorithm (for p % 4 = 3)
102+
assert pow(y, 2, P) == rhs, f"pubkey is not on curve ({compressed_pubkey.hex()})"
103+
tag_is_odd = compressed_pubkey[0] == 3
104+
y_is_odd = (y & 1) == 1
105+
if tag_is_odd != y_is_odd: # fix parity (even/odd) if necessary
106+
y = P - y
107+
return bytes([4]) + x.to_bytes(32, 'big') + y.to_bytes(32, 'big')
108+
109+
110+
def main():
111+
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
112+
parser.add_argument('infile', help='filename of compact-serialized UTXO set (input)')
113+
parser.add_argument('outfile', help='filename of created SQLite3 database (output)')
114+
parser.add_argument('-v', '--verbose', action='store_true', help='show details about each UTXO')
115+
args = parser.parse_args()
116+
117+
if not os.path.exists(args.infile):
118+
print(f"Error: provided input file '{args.infile}' doesn't exist.")
119+
sys.exit(1)
120+
121+
if os.path.exists(args.outfile):
122+
print(f"Error: provided output file '{args.outfile}' already exists.")
123+
sys.exit(1)
124+
125+
# create database table
126+
con = sqlite3.connect(args.outfile)
127+
con.execute("CREATE TABLE utxos(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)")
128+
129+
# read metadata (magic bytes, version, network magic, block height, block hash, UTXO count)
130+
f = open(args.infile, 'rb')
131+
magic_bytes = f.read(5)
132+
version = int.from_bytes(f.read(2), 'little')
133+
network_magic = f.read(4)
134+
block_hash = f.read(32)
135+
num_utxos = int.from_bytes(f.read(8), 'little')
136+
if magic_bytes != UTXO_DUMP_MAGIC:
137+
print(f"Error: provided input file '{args.infile}' is not an UTXO dump.")
138+
sys.exit(1)
139+
if version != UTXO_DUMP_VERSION:
140+
print(f"Error: provided input file '{args.infile}' has unknown UTXO dump version {version} "
141+
f"(only version {UTXO_DUMP_VERSION} supported)")
142+
sys.exit(1)
143+
network_string = NET_MAGIC_BYTES.get(network_magic, f"unknown network ({network_magic.hex()})")
144+
print(f"UTXO Snapshot for {network_string} at block hash "
145+
f"{block_hash[::-1].hex()[:32]}..., contains {num_utxos} coins")
146+
147+
start_time = time.time()
148+
write_batch = []
149+
coins_per_hash_left = 0
150+
prevout_hash = None
151+
max_height = 0
152+
153+
for coin_idx in range(1, num_utxos+1):
154+
# read key (COutPoint)
155+
if coins_per_hash_left == 0: # read next prevout hash
156+
prevout_hash = f.read(32)[::-1].hex()
157+
coins_per_hash_left = read_compactsize(f)
158+
prevout_index = read_compactsize(f)
159+
# read value (Coin)
160+
code = read_varint(f)
161+
height = code >> 1
162+
is_coinbase = code & 1
163+
amount = decompress_amount(read_varint(f))
164+
scriptpubkey = decompress_script(f).hex()
165+
write_batch.append((prevout_hash, prevout_index, amount, is_coinbase, height, scriptpubkey))
166+
if height > max_height:
167+
max_height = height
168+
coins_per_hash_left -= 1
169+
170+
if args.verbose:
171+
print(f"Coin {coin_idx}/{num_utxos}:")
172+
print(f" prevout = {prevout_hash}:{prevout_index}")
173+
print(f" amount = {amount}, height = {height}, coinbase = {is_coinbase}")
174+
print(f" scriptPubKey = {scriptpubkey}\n")
175+
176+
if coin_idx % (16*1024) == 0 or coin_idx == num_utxos:
177+
# write utxo batch to database
178+
con.executemany("INSERT INTO utxos VALUES(?, ?, ?, ?, ?, ?)", write_batch)
179+
con.commit()
180+
write_batch.clear()
181+
182+
if coin_idx % (1024*1024) == 0:
183+
elapsed = time.time() - start_time
184+
print(f"{coin_idx} coins converted [{coin_idx/num_utxos*100:.2f}%], " +
185+
f"{elapsed:.3f}s passed since start")
186+
con.close()
187+
188+
print(f"TOTAL: {num_utxos} coins written to {args.outfile}, snapshot height is {max_height}.")
189+
if f.read(1) != b'': # EOF should be reached by now
190+
print(f"WARNING: input file {args.infile} has not reached EOF yet!")
191+
sys.exit(1)
192+
193+
194+
if __name__ == '__main__':
195+
main()

0 commit comments

Comments
 (0)