|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | +blockchain-parser.py |
| 5 | +
|
| 6 | +Author: Denis Leonov |
| 7 | +Project: Blockchain Parser (Blockchain Scalpel) |
| 8 | +Repository: https://github.com/ragestack/blockchain-parser |
| 9 | +Version: 2.0.0 |
| 10 | +
|
| 11 | +License: |
| 12 | + Blockchain Scalpel License (Source-Available, Non-Commercial) |
| 13 | + Free for non-commercial use with attribution. |
| 14 | + Commercial use and any SaaS/cloud/hosted use require a paid license. |
| 15 | +
|
| 16 | +Commercial licensing contact: 466611@gmail.com |
| 17 | +Other author's contact info: https://aaris.ru/DL |
| 18 | +
|
| 19 | +Limited warranty: |
| 20 | + A limited compatibility warranty related to block format changes |
| 21 | + is provided. See LICENSE and WARRANTY.md for details. |
| 22 | +""" |
| 23 | + |
| 24 | +__version__ = "2.0.0" |
| 25 | +__author__ = "Denis Leonov" |
| 26 | + |
| 27 | +# SPDX-License-Identifier: LicenseRef-Blockchain-Scalpel |
| 28 | + |
| 29 | +import os, io, sys |
| 30 | +import datetime |
| 31 | +import hashlib |
| 32 | + |
| 33 | +def reverse(input): |
| 34 | + L = len(input) |
| 35 | + if (L % 2) != 0: |
| 36 | + return None |
| 37 | + else: |
| 38 | + Res = '' |
| 39 | + L = L // 2 |
| 40 | + for i in range(L): |
| 41 | + T = input[i*2] + input[i*2+1] |
| 42 | + Res = T + Res |
| 43 | + T = '' |
| 44 | + return (Res); |
| 45 | + |
| 46 | +def merkle_root(h): |
| 47 | + d = lambda b:hashlib.sha256(hashlib.sha256(b).digest()).digest() |
| 48 | + rev = lambda x:x[::-1] |
| 49 | + h = list(map(rev,h)) |
| 50 | + while len(h) > 1: |
| 51 | + if len(h)&1:h += h[-1:] |
| 52 | + h = [d(h[i]+h[i+1]) for i in range(0,len(h),2)] |
| 53 | + return rev(h[0]) |
| 54 | + |
| 55 | +def read_bytes(file,n,byte_order = 'L'): |
| 56 | + data = file.read(n) |
| 57 | + if byte_order == 'L': |
| 58 | + data = data[::-1] |
| 59 | + data = data.hex().upper() |
| 60 | + return data |
| 61 | + |
| 62 | +def read_varint(file): |
| 63 | + b = file.read(1) |
| 64 | + bInt = int(b.hex(),16) |
| 65 | + c = 0 |
| 66 | + data = '' |
| 67 | + if bInt < 253: |
| 68 | + c = 1 |
| 69 | + data = b.hex().upper() |
| 70 | + if bInt == 253: c = 3 |
| 71 | + if bInt == 254: c = 5 |
| 72 | + if bInt == 255: c = 9 |
| 73 | + for j in range(1,c): |
| 74 | + b = file.read(1) |
| 75 | + b = b.hex().upper() |
| 76 | + data = b + data |
| 77 | + return data |
| 78 | + |
| 79 | +def print_help(script_name): |
| 80 | + print( |
| 81 | + f"Usage:\n" |
| 82 | + f" python {script_name} <dirA> <dirB>\n\n" |
| 83 | + f"Arguments:\n" |
| 84 | + f" dirA Directory where blk*.dat files are stored (must exist)\n" |
| 85 | + f" dirB Output directory for parsing results (must already exist)\n" |
| 86 | + ) |
| 87 | + |
| 88 | +def main(): |
| 89 | + script_name = os.path.basename(sys.argv[0]) |
| 90 | + |
| 91 | + if len(sys.argv) == 2 and sys.argv[1] in ("-h", "--help"): |
| 92 | + print_help(script_name) |
| 93 | + sys.exit(0) |
| 94 | + |
| 95 | + if len(sys.argv) != 3: |
| 96 | + print("Error: exactly 2 arguments are required.\n", file=sys.stderr) |
| 97 | + print_help(script_name) |
| 98 | + sys.exit(1) |
| 99 | + |
| 100 | + dirA = os.path.abspath(os.path.expanduser(sys.argv[1])) |
| 101 | + dirB = os.path.abspath(os.path.expanduser(sys.argv[2])) |
| 102 | + |
| 103 | + if not os.path.isdir(dirA): |
| 104 | + print(f"Error: input directory does not exist or is not a directory:\n {dirA}", file=sys.stderr) |
| 105 | + sys.exit(1) |
| 106 | + |
| 107 | + if not os.path.exists(dirB): |
| 108 | + print( |
| 109 | + f"Error: output directory does not exist (create it first):\n {dirB}", file=sys.stderr) |
| 110 | + sys.exit(1) |
| 111 | + |
| 112 | + if not os.path.isdir(dirB): |
| 113 | + print(f"Error: output path exists but is not a directory:\n {dirB}", file=sys.stderr) |
| 114 | + sys.exit(1) |
| 115 | + |
| 116 | + if not os.access(dirB, os.W_OK): |
| 117 | + print(f"Error: output directory is not writable:\n {dirB}", file=sys.stderr) |
| 118 | + sys.exit(1) |
| 119 | + |
| 120 | + kN = os.path.join(dirA, 'xor.dat') |
| 121 | + |
| 122 | + k_ = b'\x00\x00\x00\x00\x00\x00\x00\x00' |
| 123 | + lk_ = len(k_) |
| 124 | + if os.path.isfile(kN): |
| 125 | + with open(kN, 'rb') as kF: |
| 126 | + k_ = kF.read(lk_) |
| 127 | + |
| 128 | + fList = [x for x in os.listdir(dirA) if x.endswith('.dat') and x.startswith('blk')] |
| 129 | + rList = [x for x in os.listdir(dirB) if x.endswith('.txt') and x.startswith('blk')] |
| 130 | + |
| 131 | + rNames = {os.path.splitext(x)[0] for x in rList} |
| 132 | + |
| 133 | + fList = [x for x in fList if os.path.splitext(x)[0] not in rNames] |
| 134 | + fList.sort() |
| 135 | + |
| 136 | + for i in fList: |
| 137 | + nameSrc = i |
| 138 | + nameRes = nameSrc.replace('.dat','.txt') |
| 139 | + resList = [] |
| 140 | + a = 0 |
| 141 | + t = os.path.join(dirA, nameSrc) |
| 142 | + resList.append('Start ' + t + ' in ' + str(datetime.datetime.now())) |
| 143 | + print ('Start ' + t + ' in ' + str(datetime.datetime.now())) |
| 144 | + with open(t,'rb') as f0: |
| 145 | + b_ = bytearray(f0.read()) |
| 146 | + if any(k_): |
| 147 | + for ii in range(len(b_)): |
| 148 | + b_[ii] ^= k_[ii%lk_] |
| 149 | + f = io.BytesIO(b_) |
| 150 | + tmpHex = '' |
| 151 | + fSize = os.path.getsize(t) |
| 152 | + while f.tell() != fSize: |
| 153 | + tmpErr = 0 |
| 154 | + while tmpHex != 'D9B4BEF9': # it is for to skip zeroes in some blk files |
| 155 | + tmpHex = read_bytes(f,4) |
| 156 | + tmpErr += 1 |
| 157 | + if tmpErr > 2: |
| 158 | + raise ValueError(f"Invalid data: magic number missing — possible truncated {i} file") |
| 159 | + resList.append('Magic number = ' + tmpHex) |
| 160 | + tmpHex = read_bytes(f,4) |
| 161 | + resList.append('Block size = ' + tmpHex) |
| 162 | + tmpPos3 = f.tell() |
| 163 | + tmpHex = read_bytes(f,80,'B') |
| 164 | + tmpHex = bytes.fromhex(tmpHex) |
| 165 | + tmpHex = hashlib.new('sha256', tmpHex).digest() |
| 166 | + tmpHex = hashlib.new('sha256', tmpHex).digest() |
| 167 | + tmpHex = tmpHex[::-1] |
| 168 | + tmpHex = tmpHex.hex().upper() |
| 169 | + resList.append('SHA256 hash of the current block hash = ' + tmpHex) |
| 170 | + f.seek(tmpPos3,0) |
| 171 | + tmpHex = read_bytes(f,4) |
| 172 | + resList.append('Version number = ' + tmpHex) |
| 173 | + tmpHex = read_bytes(f,32) |
| 174 | + resList.append('SHA256 hash of the previous block hash = ' + tmpHex) |
| 175 | + tmpHex = read_bytes(f,32) |
| 176 | + resList.append('MerkleRoot hash = ' + tmpHex) |
| 177 | + MerkleRoot = tmpHex |
| 178 | + tmpHex = read_bytes(f,4) |
| 179 | + resList.append('Time stamp = ' + tmpHex) |
| 180 | + tmpHex = read_bytes(f,4) |
| 181 | + resList.append('Difficulty = ' + tmpHex) |
| 182 | + tmpHex = read_bytes(f,4) |
| 183 | + resList.append('Random number = ' + tmpHex) |
| 184 | + tmpHex = read_varint(f) |
| 185 | + txCount = int(tmpHex,16) |
| 186 | + resList.append('Transactions count = ' + str(txCount)) |
| 187 | + resList.append('') |
| 188 | + tmpHex = ''; RawTX = ''; tx_hashes = [] |
| 189 | + for k in range(txCount): |
| 190 | + tmpHex = read_bytes(f,4) |
| 191 | + resList.append('TX version number = ' + tmpHex) |
| 192 | + RawTX = reverse(tmpHex) |
| 193 | + tmpHex = '' |
| 194 | + Witness = False |
| 195 | + b = f.read(1) |
| 196 | + tmpB = b.hex().upper() |
| 197 | + bInt = int(b.hex(),16) |
| 198 | + if bInt == 0: |
| 199 | + tmpB = '' |
| 200 | + f.seek(1,1) |
| 201 | + c = 0 |
| 202 | + c = f.read(1) |
| 203 | + bInt = int(c.hex(),16) |
| 204 | + tmpB = c.hex().upper() |
| 205 | + Witness = True |
| 206 | + c = 0 |
| 207 | + if bInt < 253: |
| 208 | + c = 1 |
| 209 | + tmpHex = hex(bInt)[2:].upper().zfill(2) |
| 210 | + tmpB = '' |
| 211 | + if bInt == 253: c = 3 |
| 212 | + if bInt == 254: c = 5 |
| 213 | + if bInt == 255: c = 9 |
| 214 | + for j in range(1,c): |
| 215 | + b = f.read(1) |
| 216 | + b = b.hex().upper() |
| 217 | + tmpHex = b + tmpHex |
| 218 | + inCount = int(tmpHex,16) |
| 219 | + resList.append('Inputs count = ' + tmpHex) |
| 220 | + tmpHex = tmpHex + tmpB |
| 221 | + RawTX = RawTX + reverse(tmpHex) |
| 222 | + for m in range(inCount): |
| 223 | + tmpHex = read_bytes(f,32) |
| 224 | + resList.append('TX from hash = ' + tmpHex) |
| 225 | + RawTX = RawTX + reverse(tmpHex) |
| 226 | + tmpHex = read_bytes(f,4) |
| 227 | + resList.append('N output = ' + tmpHex) |
| 228 | + RawTX = RawTX + reverse(tmpHex) |
| 229 | + tmpHex = '' |
| 230 | + b = f.read(1) |
| 231 | + tmpB = b.hex().upper() |
| 232 | + bInt = int(b.hex(),16) |
| 233 | + c = 0 |
| 234 | + if bInt < 253: |
| 235 | + c = 1 |
| 236 | + tmpHex = b.hex().upper() |
| 237 | + tmpB = '' |
| 238 | + if bInt == 253: c = 3 |
| 239 | + if bInt == 254: c = 5 |
| 240 | + if bInt == 255: c = 9 |
| 241 | + for j in range(1,c): |
| 242 | + b = f.read(1) |
| 243 | + b = b.hex().upper() |
| 244 | + tmpHex = b + tmpHex |
| 245 | + scriptLength = int(tmpHex,16) |
| 246 | + tmpHex = tmpHex + tmpB |
| 247 | + RawTX = RawTX + reverse(tmpHex) |
| 248 | + tmpHex = read_bytes(f,scriptLength,'B') |
| 249 | + resList.append('Input script = ' + tmpHex) |
| 250 | + RawTX = RawTX + tmpHex |
| 251 | + tmpHex = read_bytes(f,4,'B') |
| 252 | + resList.append('Sequence number = ' + tmpHex) |
| 253 | + RawTX = RawTX + tmpHex |
| 254 | + tmpHex = '' |
| 255 | + b = f.read(1) |
| 256 | + tmpB = b.hex().upper() |
| 257 | + bInt = int(b.hex(),16) |
| 258 | + c = 0 |
| 259 | + if bInt < 253: |
| 260 | + c = 1 |
| 261 | + tmpHex = b.hex().upper() |
| 262 | + tmpB = '' |
| 263 | + if bInt == 253: c = 3 |
| 264 | + if bInt == 254: c = 5 |
| 265 | + if bInt == 255: c = 9 |
| 266 | + for j in range(1,c): |
| 267 | + b = f.read(1) |
| 268 | + b = b.hex().upper() |
| 269 | + tmpHex = b + tmpHex |
| 270 | + outputCount = int(tmpHex,16) |
| 271 | + tmpHex = tmpHex + tmpB |
| 272 | + resList.append('Outputs count = ' + str(outputCount)) |
| 273 | + RawTX = RawTX + reverse(tmpHex) |
| 274 | + for m in range(outputCount): |
| 275 | + tmpHex = read_bytes(f,8) |
| 276 | + Value = tmpHex |
| 277 | + RawTX = RawTX + reverse(tmpHex) |
| 278 | + tmpHex = '' |
| 279 | + b = f.read(1) |
| 280 | + tmpB = b.hex().upper() |
| 281 | + bInt = int(b.hex(),16) |
| 282 | + c = 0 |
| 283 | + if bInt < 253: |
| 284 | + c = 1 |
| 285 | + tmpHex = b.hex().upper() |
| 286 | + tmpB = '' |
| 287 | + if bInt == 253: c = 3 |
| 288 | + if bInt == 254: c = 5 |
| 289 | + if bInt == 255: c = 9 |
| 290 | + for j in range(1,c): |
| 291 | + b = f.read(1) |
| 292 | + b = b.hex().upper() |
| 293 | + tmpHex = b + tmpHex |
| 294 | + scriptLength = int(tmpHex,16) |
| 295 | + tmpHex = tmpHex + tmpB |
| 296 | + RawTX = RawTX + reverse(tmpHex) |
| 297 | + tmpHex = read_bytes(f,scriptLength,'B') |
| 298 | + resList.append('Value = ' + Value) |
| 299 | + resList.append('Output script = ' + tmpHex) |
| 300 | + RawTX = RawTX + tmpHex |
| 301 | + tmpHex = '' |
| 302 | + if Witness == True: |
| 303 | + for m in range(inCount): |
| 304 | + tmpHex = read_varint(f) |
| 305 | + WitnessLength = int(tmpHex,16) |
| 306 | + for j in range(WitnessLength): |
| 307 | + tmpHex = read_varint(f) |
| 308 | + WitnessItemLength = int(tmpHex,16) |
| 309 | + tmpHex = read_bytes(f,WitnessItemLength) |
| 310 | + resList.append('Witness ' + str(m) + ' ' + str(j) + ' ' + str(WitnessItemLength) + ' ' + tmpHex) |
| 311 | + tmpHex = '' |
| 312 | + Witness = False |
| 313 | + tmpHex = read_bytes(f,4) |
| 314 | + resList.append('Lock time = ' + tmpHex) |
| 315 | + RawTX = RawTX + reverse(tmpHex) |
| 316 | + tmpHex = RawTX |
| 317 | + tmpHex = bytes.fromhex(tmpHex) |
| 318 | + tmpHex = hashlib.new('sha256', tmpHex).digest() |
| 319 | + tmpHex = hashlib.new('sha256', tmpHex).digest() |
| 320 | + tmpHex = tmpHex[::-1] |
| 321 | + tmpHex = tmpHex.hex().upper() |
| 322 | + resList.append('TX hash = ' + tmpHex) |
| 323 | + tx_hashes.append(tmpHex) |
| 324 | + resList.append(''); tmpHex = ''; RawTX = '' |
| 325 | + a += 1 |
| 326 | + tx_hashes = [bytes.fromhex(h) for h in tx_hashes] |
| 327 | + tmpHex = merkle_root(tx_hashes).hex().upper() |
| 328 | + if tmpHex != MerkleRoot: |
| 329 | + print ('Merkle roots does not match! >',MerkleRoot,tmpHex) |
| 330 | + f.close() |
| 331 | + f = open(os.path.join(dirB, nameRes),'w') |
| 332 | + for j in resList: |
| 333 | + f.write(j + '\n') |
| 334 | + f.close() |
| 335 | + |
| 336 | + print ('All done ' + str(datetime.datetime.now())) |
| 337 | + |
| 338 | +if __name__ == "__main__": |
| 339 | + main() |
0 commit comments