Limit CaptureRegion sizes in format_inspector for VMDK and VHDX

quatre · konan-abhi · commit 06a18202ab52 · 2023-02-07T16:49:15.000Z
VMDK: When parsing a VMDK file to calculate its size, the format_inspector determines the location of the Descriptor section by reading two uint64 from the headers of the file and uses them to create the descriptor CaptureRegion. It would be possible to craft a VMDK file that commands the format_inspector to create a very big CaptureRegion, thus exhausting resources on the glance-api process. This patch binds the beginning of the descriptor to 0x200 and limits the size of the CaptureRegion to 1MB, similar to how the VMDK descriptor is parsed by qemu. VHDX: It is a bit more involved, but similar: when looking for the VIRTUAL_DISK_SIZE metadata, the format_inspector was creating an unbounded CaptureRegion. In the same way as it seems to be done in Qemu, we now limit the upper bound of this CaptureRegion. Closes-Bug: #2006490 Change-Id: I3ec5a33df20e1cfb6673f4ff1c7c91aacd065532 (cherry picked from commit d4d33ee)
diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py
@@ -345,6 +345,7 @@ class VHDXInspector(FileInspector):
     """
     METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E'
     VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8'
+    VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048  # From qemu
 
     def __init__(self, *a, **k):
         super(VHDXInspector, self).__init__(*a, **k)
@@ -459,6 +460,8 @@ def _find_meta_entry(self, desired_guid):
                 item_offset, item_length, _reserved = struct.unpack(
                     '<III',
                     meta_buffer[entry_offset + 16:entry_offset + 28])
+                item_length = min(item_length,
+                                  self.VHDX_METADATA_TABLE_MAX_SIZE)
                 self.region('metadata').length = len(meta_buffer)
                 self._log.debug('Found entry at offset %x', item_offset)
                 # Metadata item offset is from the beginning of the metadata
@@ -516,6 +519,12 @@ class VMDKInspector(FileInspector):
     variable number of 512 byte sectors, but is just text defining the
     layout of the disk.
     """
+
+    # The beginning and max size of the descriptor is also hardcoded in Qemu
+    # at 0x200 and 1MB - 1
+    DESC_OFFSET = 0x200
+    DESC_MAX_SIZE = (1 << 20) - 1
+
     def __init__(self, *a, **k):
         super(VMDKInspector, self).__init__(*a, **k)
         self.new_region('header', CaptureRegion(0, 512))
@@ -532,15 +541,22 @@ def post_process(self):
 
         if sig != b'KDMV':
             raise ImageFormatError('Signature KDMV not found: %r' % sig)
-            return
 
         if ver not in (1, 2, 3):
             raise ImageFormatError('Unsupported format version %i' % ver)
-            return
+
+        # Since we parse both desc_sec and desc_num (the location of the
+        # VMDK's descriptor, expressed in 512 bytes sectors) we enforce a
+        # check on the bounds to create a reasonable CaptureRegion. This
+        # is similar to how it's done in qemu.
+        desc_offset = desc_sec * 512
+        desc_size = min(desc_num * 512, self.DESC_MAX_SIZE)
+        if desc_offset != self.DESC_OFFSET:
+            raise ImageFormatError("Wrong descriptor location")
 
         if not self.has_region('descriptor'):
             self.new_region('descriptor', CaptureRegion(
-                desc_sec * 512, desc_num * 512))
+                desc_offset, desc_size))
 
     @property
     def format_match(self):
diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py
@@ -16,6 +16,7 @@
 import io
 import os
 import re
+import struct
 import subprocess
 import tempfile
 from unittest import mock
@@ -63,6 +64,28 @@ def _create_img(self, fmt, size):
             shell=True)
         return fn
 
+    def _create_allocated_vmdk(self, size_mb):
+        # We need a "big" VMDK file to exercise some parts of the code of the
+        # format_inspector. A way to create one is to first create an empty
+        # file, and then to convert it with the -S 0 option.
+        fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-',
+                             suffix='.vmdk')
+        self._created_files.append(fn)
+        zeroes = tempfile.mktemp(prefix='glance-unittest-formatinspector-',
+                                 suffix='.zero')
+        self._created_files.append(zeroes)
+
+        # Create an empty file
+        subprocess.check_output(
+            'dd if=/dev/zero of=%s bs=1M count=%i' % (zeroes, size_mb),
+            shell=True)
+
+        # Convert it to VMDK
+        subprocess.check_output(
+            'qemu-img convert -f raw -O vmdk -S 0 %s %s' % (zeroes, fn),
+            shell=True)
+        return fn
+
     def _test_format_at_block_size(self, format_name, img, block_size):
         fmt = format_inspector.get_inspector(format_name)()
         self.assertIsNotNone(fmt,
@@ -119,6 +142,64 @@ def test_vhdx(self):
     def test_vmdk(self):
         self._test_format('vmdk')
 
+    def test_vmdk_bad_descriptor_offset(self):
+        format_name = 'vmdk'
+        image_size = 10 * units.Mi
+        descriptorOffsetAddr = 0x1c
+        BAD_ADDRESS = 0x400
+        img = self._create_img(format_name, image_size)
+
+        # Corrupt the header
+        fd = open(img, 'r+b')
+        fd.seek(descriptorOffsetAddr)
+        fd.write(struct.pack('<Q', BAD_ADDRESS // 512))
+        fd.close()
+
+        # Read the format in various sizes, some of which will read whole
+        # sections in a single read, others will be completely unaligned, etc.
+        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+            fmt = self._test_format_at_block_size(format_name, img, block_size)
+            self.assertTrue(fmt.format_match,
+                            'Failed to match %s at size %i block %i' % (
+                                format_name, image_size, block_size))
+            self.assertEqual(0, fmt.virtual_size,
+                             ('Calculated a virtual size for a corrupt %s at '
+                              'size %i block %i') % (format_name, image_size,
+                                                     block_size))
+
+    def test_vmdk_bad_descriptor_mem_limit(self):
+        format_name = 'vmdk'
+        image_size = 5 * units.Mi
+        virtual_size = 5 * units.Mi
+        descriptorOffsetAddr = 0x1c
+        descriptorSizeAddr = descriptorOffsetAddr + 8
+        twoMBInSectors = (2 << 20) // 512
+        # We need a big VMDK because otherwise we will not have enough data to
+        # fill-up the CaptureRegion.
+        img = self._create_allocated_vmdk(image_size // units.Mi)
+
+        # Corrupt the end of descriptor address so it "ends" at 2MB
+        fd = open(img, 'r+b')
+        fd.seek(descriptorSizeAddr)
+        fd.write(struct.pack('<Q', twoMBInSectors))
+        fd.close()
+
+        # Read the format in various sizes, some of which will read whole
+        # sections in a single read, others will be completely unaligned, etc.
+        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+            fmt = self._test_format_at_block_size(format_name, img, block_size)
+            self.assertTrue(fmt.format_match,
+                            'Failed to match %s at size %i block %i' % (
+                                format_name, image_size, block_size))
+            self.assertEqual(virtual_size, fmt.virtual_size,
+                             ('Failed to calculate size for %s at size %i '
+                              'block %i') % (format_name, image_size,
+                                             block_size))
+            memory = sum(fmt.context_info.values())
+            self.assertLess(memory, 1.5 * units.Mi,
+                            'Format used more than 1.5MiB of memory: %s' % (
+                                fmt.context_info))
+
     def test_vdi(self):
         self._test_format('vdi')
 
@@ -275,3 +356,42 @@ def test_get_inspector(self):
         self.assertEqual(format_inspector.QcowInspector,
                          format_inspector.get_inspector('qcow2'))
         self.assertIsNone(format_inspector.get_inspector('foo'))
+
+
+class TestFormatInspectorsTargeted(test_utils.BaseTestCase):
+    def _make_vhd_meta(self, guid_raw, item_length):
+        # Meta region header, padded to 32 bytes
+        data = struct.pack('<8sHH', b'metadata', 0, 1)
+        data += b'0' * 20
+
+        # Metadata table entry, 16-byte GUID, 12-byte information,
+        # padded to 32-bytes
+        data += guid_raw
+        data += struct.pack('<III', 256, item_length, 0)
+        data += b'0' * 6
+
+        return data
+
+    def test_vhd_table_over_limit(self):
+        ins = format_inspector.VHDXInspector()
+        meta = format_inspector.CaptureRegion(0, 0)
+        desired = b'012345678ABCDEF0'
+        # This is a poorly-crafted image that specifies a larger table size
+        # than is allowed
+        meta.data = self._make_vhd_meta(desired, 33 * 2048)
+        ins.new_region('metadata', meta)
+        new_region = ins._find_meta_entry(ins._guid(desired))
+        # Make sure we clamp to our limit of 32 * 2048
+        self.assertEqual(
+            format_inspector.VHDXInspector.VHDX_METADATA_TABLE_MAX_SIZE,
+            new_region.length)
+
+    def test_vhd_table_under_limit(self):
+        ins = format_inspector.VHDXInspector()
+        meta = format_inspector.CaptureRegion(0, 0)
+        desired = b'012345678ABCDEF0'
+        meta.data = self._make_vhd_meta(desired, 16 * 2048)
+        ins.new_region('metadata', meta)
+        new_region = ins._find_meta_entry(ins._guid(desired))
+        # Table size was under the limit, make sure we get it back
+        self.assertEqual(16 * 2048, new_region.length)