|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# Copyright (c) 2024-present The Bitcoin Core developers |
| 3 | +# Distributed under the MIT software license, see the accompanying |
| 4 | +# file COPYING or http://www.opensource.org/licenses/mit-license.php. |
| 5 | +"""Tool to convert a compact-serialized UTXO set to a SQLite3 database. |
| 6 | +
|
| 7 | +The input UTXO set can be generated by Bitcoin Core with the `dumptxoutset` RPC: |
| 8 | +$ bitcoin-cli dumptxoutset ~/utxos.dat |
| 9 | +
|
| 10 | +The created database contains a table `utxos` with the following schema: |
| 11 | +(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT) |
| 12 | +""" |
| 13 | +import argparse |
| 14 | +import os |
| 15 | +import sqlite3 |
| 16 | +import sys |
| 17 | +import time |
| 18 | + |
| 19 | + |
| 20 | +UTXO_DUMP_MAGIC = b'utxo\xff' |
| 21 | +UTXO_DUMP_VERSION = 2 |
| 22 | +NET_MAGIC_BYTES = { |
| 23 | + b"\xf9\xbe\xb4\xd9": "Mainnet", |
| 24 | + b"\x0a\x03\xcf\x40": "Signet", |
| 25 | + b"\x0b\x11\x09\x07": "Testnet3", |
| 26 | + b"\x1c\x16\x3f\x28": "Testnet4", |
| 27 | + b"\xfa\xbf\xb5\xda": "Regtest", |
| 28 | +} |
| 29 | + |
| 30 | + |
| 31 | +def read_varint(f): |
| 32 | + """Equivalent of `ReadVarInt()` (see serialization module).""" |
| 33 | + n = 0 |
| 34 | + while True: |
| 35 | + dat = f.read(1)[0] |
| 36 | + n = (n << 7) | (dat & 0x7f) |
| 37 | + if (dat & 0x80) > 0: |
| 38 | + n += 1 |
| 39 | + else: |
| 40 | + return n |
| 41 | + |
| 42 | + |
| 43 | +def read_compactsize(f): |
| 44 | + """Equivalent of `ReadCompactSize()` (see serialization module).""" |
| 45 | + n = f.read(1)[0] |
| 46 | + if n == 253: |
| 47 | + n = int.from_bytes(f.read(2), "little") |
| 48 | + elif n == 254: |
| 49 | + n = int.from_bytes(f.read(4), "little") |
| 50 | + elif n == 255: |
| 51 | + n = int.from_bytes(f.read(8), "little") |
| 52 | + return n |
| 53 | + |
| 54 | + |
| 55 | +def decompress_amount(x): |
| 56 | + """Equivalent of `DecompressAmount()` (see compressor module).""" |
| 57 | + if x == 0: |
| 58 | + return 0 |
| 59 | + x -= 1 |
| 60 | + e = x % 10 |
| 61 | + x //= 10 |
| 62 | + n = 0 |
| 63 | + if e < 9: |
| 64 | + d = (x % 9) + 1 |
| 65 | + x //= 9 |
| 66 | + n = x * 10 + d |
| 67 | + else: |
| 68 | + n = x + 1 |
| 69 | + while e > 0: |
| 70 | + n *= 10 |
| 71 | + e -= 1 |
| 72 | + return n |
| 73 | + |
| 74 | + |
| 75 | +def decompress_script(f): |
| 76 | + """Equivalent of `DecompressScript()` (see compressor module).""" |
| 77 | + size = read_varint(f) # sizes 0-5 encode compressed script types |
| 78 | + if size == 0: # P2PKH |
| 79 | + return bytes([0x76, 0xa9, 20]) + f.read(20) + bytes([0x88, 0xac]) |
| 80 | + elif size == 1: # P2SH |
| 81 | + return bytes([0xa9, 20]) + f.read(20) + bytes([0x87]) |
| 82 | + elif size in (2, 3): # P2PK (compressed) |
| 83 | + return bytes([33, size]) + f.read(32) + bytes([0xac]) |
| 84 | + elif size in (4, 5): # P2PK (uncompressed) |
| 85 | + compressed_pubkey = bytes([size - 2]) + f.read(32) |
| 86 | + return bytes([65]) + decompress_pubkey(compressed_pubkey) + bytes([0xac]) |
| 87 | + else: # others (bare multisig, segwit etc.) |
| 88 | + size -= 6 |
| 89 | + assert size <= 10000, f"too long script with size {size}" |
| 90 | + return f.read(size) |
| 91 | + |
| 92 | + |
| 93 | +def decompress_pubkey(compressed_pubkey): |
| 94 | + """Decompress pubkey by calculating y = sqrt(x^3 + 7) % p |
| 95 | + (see functions `secp256k1_eckey_pubkey_parse` and `secp256k1_ge_set_xo_var`). |
| 96 | + """ |
| 97 | + P = 2**256 - 2**32 - 977 # secp256k1 field size |
| 98 | + assert len(compressed_pubkey) == 33 and compressed_pubkey[0] in (2, 3) |
| 99 | + x = int.from_bytes(compressed_pubkey[1:], 'big') |
| 100 | + rhs = (x**3 + 7) % P |
| 101 | + y = pow(rhs, (P + 1)//4, P) # get sqrt using Tonelli-Shanks algorithm (for p % 4 = 3) |
| 102 | + assert pow(y, 2, P) == rhs, f"pubkey is not on curve ({compressed_pubkey.hex()})" |
| 103 | + tag_is_odd = compressed_pubkey[0] == 3 |
| 104 | + y_is_odd = (y & 1) == 1 |
| 105 | + if tag_is_odd != y_is_odd: # fix parity (even/odd) if necessary |
| 106 | + y = P - y |
| 107 | + return bytes([4]) + x.to_bytes(32, 'big') + y.to_bytes(32, 'big') |
| 108 | + |
| 109 | + |
| 110 | +def main(): |
| 111 | + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
| 112 | + parser.add_argument('infile', help='filename of compact-serialized UTXO set (input)') |
| 113 | + parser.add_argument('outfile', help='filename of created SQLite3 database (output)') |
| 114 | + parser.add_argument('-v', '--verbose', action='store_true', help='show details about each UTXO') |
| 115 | + args = parser.parse_args() |
| 116 | + |
| 117 | + if not os.path.exists(args.infile): |
| 118 | + print(f"Error: provided input file '{args.infile}' doesn't exist.") |
| 119 | + sys.exit(1) |
| 120 | + |
| 121 | + if os.path.exists(args.outfile): |
| 122 | + print(f"Error: provided output file '{args.outfile}' already exists.") |
| 123 | + sys.exit(1) |
| 124 | + |
| 125 | + # create database table |
| 126 | + con = sqlite3.connect(args.outfile) |
| 127 | + con.execute("CREATE TABLE utxos(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)") |
| 128 | + |
| 129 | + # read metadata (magic bytes, version, network magic, block height, block hash, UTXO count) |
| 130 | + f = open(args.infile, 'rb') |
| 131 | + magic_bytes = f.read(5) |
| 132 | + version = int.from_bytes(f.read(2), 'little') |
| 133 | + network_magic = f.read(4) |
| 134 | + block_hash = f.read(32) |
| 135 | + num_utxos = int.from_bytes(f.read(8), 'little') |
| 136 | + if magic_bytes != UTXO_DUMP_MAGIC: |
| 137 | + print(f"Error: provided input file '{args.infile}' is not an UTXO dump.") |
| 138 | + sys.exit(1) |
| 139 | + if version != UTXO_DUMP_VERSION: |
| 140 | + print(f"Error: provided input file '{args.infile}' has unknown UTXO dump version {version} " |
| 141 | + f"(only version {UTXO_DUMP_VERSION} supported)") |
| 142 | + sys.exit(1) |
| 143 | + network_string = NET_MAGIC_BYTES.get(network_magic, f"unknown network ({network_magic.hex()})") |
| 144 | + print(f"UTXO Snapshot for {network_string} at block hash " |
| 145 | + f"{block_hash[::-1].hex()[:32]}..., contains {num_utxos} coins") |
| 146 | + |
| 147 | + start_time = time.time() |
| 148 | + write_batch = [] |
| 149 | + coins_per_hash_left = 0 |
| 150 | + prevout_hash = None |
| 151 | + max_height = 0 |
| 152 | + |
| 153 | + for coin_idx in range(1, num_utxos+1): |
| 154 | + # read key (COutPoint) |
| 155 | + if coins_per_hash_left == 0: # read next prevout hash |
| 156 | + prevout_hash = f.read(32)[::-1].hex() |
| 157 | + coins_per_hash_left = read_compactsize(f) |
| 158 | + prevout_index = read_compactsize(f) |
| 159 | + # read value (Coin) |
| 160 | + code = read_varint(f) |
| 161 | + height = code >> 1 |
| 162 | + is_coinbase = code & 1 |
| 163 | + amount = decompress_amount(read_varint(f)) |
| 164 | + scriptpubkey = decompress_script(f).hex() |
| 165 | + write_batch.append((prevout_hash, prevout_index, amount, is_coinbase, height, scriptpubkey)) |
| 166 | + if height > max_height: |
| 167 | + max_height = height |
| 168 | + coins_per_hash_left -= 1 |
| 169 | + |
| 170 | + if args.verbose: |
| 171 | + print(f"Coin {coin_idx}/{num_utxos}:") |
| 172 | + print(f" prevout = {prevout_hash}:{prevout_index}") |
| 173 | + print(f" amount = {amount}, height = {height}, coinbase = {is_coinbase}") |
| 174 | + print(f" scriptPubKey = {scriptpubkey}\n") |
| 175 | + |
| 176 | + if coin_idx % (16*1024) == 0 or coin_idx == num_utxos: |
| 177 | + # write utxo batch to database |
| 178 | + con.executemany("INSERT INTO utxos VALUES(?, ?, ?, ?, ?, ?)", write_batch) |
| 179 | + con.commit() |
| 180 | + write_batch.clear() |
| 181 | + |
| 182 | + if coin_idx % (1024*1024) == 0: |
| 183 | + elapsed = time.time() - start_time |
| 184 | + print(f"{coin_idx} coins converted [{coin_idx/num_utxos*100:.2f}%], " + |
| 185 | + f"{elapsed:.3f}s passed since start") |
| 186 | + con.close() |
| 187 | + |
| 188 | + print(f"TOTAL: {num_utxos} coins written to {args.outfile}, snapshot height is {max_height}.") |
| 189 | + if f.read(1) != b'': # EOF should be reached by now |
| 190 | + print(f"WARNING: input file {args.infile} has not reached EOF yet!") |
| 191 | + sys.exit(1) |
| 192 | + |
| 193 | + |
| 194 | +if __name__ == '__main__': |
| 195 | + main() |
0 commit comments