Skip to content

Commit d5aa198

Browse files
droarkDouglas Roark
authored andcommitted
Allow linearization scripts to support hash byte reversal
Currently, the linearization scripts require input hashes to be in one endian form. Add support for byte reversal.
1 parent 7dac1e5 commit d5aa198

File tree

4 files changed

+58
-19
lines changed

4 files changed

+58
-19
lines changed

contrib/linearize/README.md

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,43 @@
11
# Linearize
2-
Construct a linear, no-fork, best version of the blockchain.
2+
Construct a linear, no-fork, best version of the Bitcoin blockchain.
33

44
## Step 1: Download hash list
55

66
$ ./linearize-hashes.py linearize.cfg > hashlist.txt
77

88
Required configuration file settings for linearize-hashes:
9-
* RPC: rpcuser, rpcpassword
9+
* RPC: `rpcuser`, `rpcpassword`
1010

1111
Optional config file setting for linearize-hashes:
12-
* RPC: host, port
13-
* Block chain: min_height, max_height
12+
* RPC: `host`, `port` (Default: `127.0.0.1:8332`)
13+
* Blockchain: `min_height`, `max_height`
14+
* `rev_hash_bytes`: If true, the written block hash list will be
15+
byte-reversed. (In other words, the hash returned by getblockhash will have its
16+
bytes reversed.) False by default. Intended for generation of
17+
standalone hash lists but safe to use with linearize-data.py, which will output
18+
the same data no matter which byte format is chosen.
1419

1520
## Step 2: Copy local block data
1621

1722
$ ./linearize-data.py linearize.cfg
1823

1924
Required configuration file settings:
20-
* "input": bitcoind blocks/ directory containing blkNNNNN.dat
21-
* "hashlist": text file containing list of block hashes, linearized-hashes.py
22-
output.
23-
* "output_file": bootstrap.dat
25+
* `output_file`: The file that will contain the final blockchain.
2426
or
25-
* "output": output directory for linearized blocks/blkNNNNN.dat output
27+
* `output`: Output directory for linearized blocks/blkNNNNN.dat output.
2628

2729
Optional config file setting for linearize-data:
28-
* "netmagic": network magic number
29-
* "max_out_sz": maximum output file size (default `1000*1000*1000`)
30-
* "split_timestamp": Split files when a new month is first seen, in addition to
31-
reaching a maximum file size.
32-
* "file_timestamp": Set each file's last-modified time to that of the
33-
most recent block in that file.
30+
* `file_timestamp`: Set each file's last-modified time to that of the most
31+
recent block in that file.
32+
* `genesis`: The hash of the genesis block in the blockchain.
33+
* `input: bitcoind blocks/ directory containing blkNNNNN.dat
34+
* `hashlist`: text file containing list of block hashes created by
35+
linearize-hashes.py.
36+
* `max_out_sz`: Maximum size for files created by the `output_file` option.
37+
(Default: `1000*1000*1000 bytes`)
38+
* `netmagic`: Network magic number.
39+
* `rev_hash_bytes`: If true, the block hash list written by linearize-hashes.py
40+
will be byte-reversed when read by linearize-data.py. See the linearize-hashes
41+
entry for more information.
42+
* `split_timestamp`: Split blockchain files when a new month is first seen, in
43+
addition to reaching a maximum file size (`max_out_sz`).

contrib/linearize/example-linearize.cfg

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ input=/home/example/.bitcoin/blocks
2323

2424
output_file=/home/example/Downloads/bootstrap.dat
2525
hashlist=hashlist.txt
26-
split_year=1
2726

2827
# Maxmimum size in bytes of out-of-order blocks cache in memory
2928
out_of_order_cache_sz = 100000000
29+
30+
# Do we want the reverse the hash bytes coming from getblockhash?
31+
rev_hash_bytes = False

contrib/linearize/linearize-data.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323

2424
settings = {}
2525

26+
##### Switch endian-ness #####
27+
def hex_switchEndian(s):
28+
""" Switches the endianness of a hex string (in pairs of hex chars) """
29+
pairList = [s[i]+s[i+1] for i in range(0,len(s),2)]
30+
return ''.join(pairList[::-1])
31+
2632
def uint32(x):
2733
return x & 0xffffffffL
2834

@@ -69,17 +75,21 @@ def get_blk_dt(blk_hdr):
6975
dt_ym = datetime.datetime(dt.year, dt.month, 1)
7076
return (dt_ym, nTime)
7177

78+
# When getting the list of block hashes, undo any byte reversals.
7279
def get_block_hashes(settings):
7380
blkindex = []
7481
f = open(settings['hashlist'], "r")
7582
for line in f:
7683
line = line.rstrip()
84+
if settings['rev_hash_bytes'] == 'true':
85+
line = hex_switchEndian(line)
7786
blkindex.append(line)
7887

7988
print("Read " + str(len(blkindex)) + " hashes")
8089

8190
return blkindex
8291

92+
# The block map shouldn't give or receive byte-reversed hashes.
8393
def mkblockmap(blkindex):
8494
blkmap = {}
8595
for height,hash in enumerate(blkindex):
@@ -265,6 +275,12 @@ def run(self):
265275
settings[m.group(1)] = m.group(2)
266276
f.close()
267277

278+
# Force hash byte format setting to be lowercase to make comparisons easier.
279+
# Also place upfront in case any settings need to know about it.
280+
if 'rev_hash_bytes' not in settings:
281+
settings['rev_hash_bytes'] = 'false'
282+
settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower()
283+
268284
if 'netmagic' not in settings:
269285
settings['netmagic'] = 'f9beb4d9'
270286
if 'genesis' not in settings:
@@ -295,9 +311,8 @@ def run(self):
295311
blkindex = get_block_hashes(settings)
296312
blkmap = mkblockmap(blkindex)
297313

314+
# Block hash map won't be byte-reversed. Neither should the genesis hash.
298315
if not settings['genesis'] in blkmap:
299316
print("Genesis block not found in hashlist")
300317
else:
301318
BlockDataCopier(settings, blkindex, blkmap).run()
302-
303-

contrib/linearize/linearize-hashes.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717

1818
settings = {}
1919

20+
##### Switch endian-ness #####
21+
def hex_switchEndian(s):
22+
""" Switches the endianness of a hex string (in pairs of hex chars) """
23+
pairList = [s[i]+s[i+1] for i in range(0,len(s),2)]
24+
return ''.join(pairList[::-1])
25+
2026
class BitcoinRPC:
2127
def __init__(self, host, port, username, password):
2228
authpair = "%s:%s" % (username, password)
@@ -70,6 +76,8 @@ def get_block_hashes(settings, max_blocks_per_call=10000):
7076
print('JSON-RPC: error at height', height+x, ': ', resp_obj['error'], file=sys.stderr)
7177
exit(1)
7278
assert(resp_obj['id'] == x) # assume replies are in-sequence
79+
if settings['rev_hash_bytes'] == 'true':
80+
resp_obj['result'] = hex_switchEndian(resp_obj['result'])
7381
print(resp_obj['result'])
7482

7583
height += num_blocks
@@ -101,6 +109,8 @@ def get_block_hashes(settings, max_blocks_per_call=10000):
101109
settings['min_height'] = 0
102110
if 'max_height' not in settings:
103111
settings['max_height'] = 313000
112+
if 'rev_hash_bytes' not in settings:
113+
settings['rev_hash_bytes'] = 'false'
104114
if 'rpcuser' not in settings or 'rpcpassword' not in settings:
105115
print("Missing username and/or password in cfg file", file=stderr)
106116
sys.exit(1)
@@ -109,5 +119,7 @@ def get_block_hashes(settings, max_blocks_per_call=10000):
109119
settings['min_height'] = int(settings['min_height'])
110120
settings['max_height'] = int(settings['max_height'])
111121

112-
get_block_hashes(settings)
122+
# Force hash byte format setting to be lowercase to make comparisons easier.
123+
settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower()
113124

125+
get_block_hashes(settings)

0 commit comments

Comments
 (0)