contrib: support reading XORed blocks in linearize-data.py script

theStack · theStack · commit 77ff0ec1f185 · 2024-08-07T23:53:39.000+02:00
Partly fixes issue bitcoin#30599.
diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py
@@ -76,6 +76,16 @@ def getFirstBlockFileId(block_dir_path):
     blkId = int(firstBlkFn[3:8])
     return blkId
 
+def read_xor_key(blocks_path):
+    NUM_XOR_BYTES = 8  # From InitBlocksdirXorKey::xor_key.size()
+    try:
+        xor_filename = os.path.join(blocks_path, "xor.dat")
+        with open(xor_filename, "rb") as xor_file:
+            return xor_file.read(NUM_XOR_BYTES)
+    # support also blockdirs created with pre-v28 versions, where no xor key exists yet
+    except FileNotFoundError:
+        return bytes([0] * NUM_XOR_BYTES)
+
 # Block header and extent on disk
 BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size'])
 
@@ -95,6 +105,7 @@ def __init__(self, settings, blkindex, blkmap):
         self.outFname = None
         self.blkCountIn = 0
         self.blkCountOut = 0
+        self.xor_key = read_xor_key(self.settings['input'])
 
         self.lastDate = datetime.datetime(2000, 1, 1)
         self.highTS = 1408893517 - 315360000
@@ -113,6 +124,13 @@ def __init__(self, settings, blkindex, blkmap):
         self.outOfOrderData = {}
         self.outOfOrderSize = 0 # running total size for items in outOfOrderData
 
+    def read_xored(self, f, size):
+        offset = f.tell()
+        data = bytearray(f.read(size))
+        for i in range(len(data)):
+            data[i] ^= self.xor_key[(i + offset) % len(self.xor_key)]
+        return bytes(data)
+
     def writeBlock(self, inhdr, blk_hdr, rawblock):
         blockSizeOnDisk = len(inhdr) + len(blk_hdr) + len(rawblock)
         if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz):
@@ -165,7 +183,7 @@ def fetchBlock(self, extent):
         '''Fetch block contents from disk given extents'''
         with open(self.inFileName(extent.fn), "rb") as f:
             f.seek(extent.offset)
-            return f.read(extent.size)
+            return self.read_xored(f, extent.size)
 
     def copyOneBlock(self):
         '''Find the next block to be written in the input, and copy it to the output.'''
@@ -190,7 +208,7 @@ def run(self):
                     print("Premature end of block data")
                     return
 
-            inhdr = self.inF.read(8)
+            inhdr = self.read_xored(self.inF, 8)
             if (not inhdr or (inhdr[0] == "\0")):
                 self.inF.close()
                 self.inF = None
@@ -207,7 +225,7 @@ def run(self):
             inLenLE = inhdr[4:]
             su = struct.unpack("<I", inLenLE)
             inLen = su[0] - 80 # length without header
-            blk_hdr = self.inF.read(80)
+            blk_hdr = self.read_xored(self.inF, 80)
             inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen)
 
             self.hash_str = calc_hash_str(blk_hdr)
@@ -224,7 +242,7 @@ def run(self):
 
             if self.blkCountOut == blkHeight:
                 # If in-order block, just copy
-                rawblock = self.inF.read(inLen)
+                rawblock = self.read_xored(self.inF, inLen)
                 self.writeBlock(inhdr, blk_hdr, rawblock)
 
                 # See if we can catch up to prior out-of-order blocks
@@ -237,7 +255,7 @@ def run(self):
                     # If there is space in the cache, read the data
                     # Reading the data in file sequence instead of seeking and fetching it later is preferred,
                     # but we don't want to fill up memory
-                    self.outOfOrderData[blkHeight] = self.inF.read(inLen)
+                    self.outOfOrderData[blkHeight] = self.read_xored(self.inF, inLen)
                     self.outOfOrderSize += inLen
                 else: # If no space in cache, seek forward
                     self.inF.seek(inLen, os.SEEK_CUR)
diff --git a/test/functional/feature_loadblock.py b/test/functional/feature_loadblock.py
@@ -26,10 +26,6 @@ def set_test_params(self):
         self.setup_clean_chain = True
         self.num_nodes = 2
         self.supports_cli = False
-        self.extra_args = [
-            ["-blocksxor=0"],  # TODO: The linearize scripts should be adjusted to apply any XOR
-            [],
-        ]
 
     def run_test(self):
         self.nodes[1].setnetworkactive(state=False)