Skip to content

Commit e8f0061

Browse files
SeanMooneyElod Illes
authored andcommitted
Add iso file format inspector
This change includes unit tests for the ISO format inspector using mkisofs to generate the iso files. A test for stashing qcow content in the system_area of an iso file is also included. This change modifies format_inspector.detect_file_format to evaluate all inspectors until they are complete and raise an InvalidDiskInfo exception if multiple formats match. Related-Bug: #2059809 Change-Id: I7e12718fb3e1f77eb8d1cfcb9fa64e8ddeb9e712 (cherry picked from commit b1cc398) (cherry picked from commit eeda7c3) (cherry picked from commit 24628ec) (cherry picked from commit 65f0789)
1 parent fb86ca6 commit e8f0061

File tree

4 files changed

+230
-18
lines changed

4 files changed

+230
-18
lines changed

nova/image/format_inspector.py

Lines changed: 104 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import struct
2525

2626
from oslo_log import log as logging
27+
from oslo_utils import units
2728

2829
LOG = logging.getLogger(__name__)
2930

@@ -843,6 +844,93 @@ def __str__(self):
843844
return 'vdi'
844845

845846

847+
class ISOInspector(FileInspector):
848+
"""ISO 9660 and UDF format
849+
850+
we need to check the first 32KB + descriptor size
851+
to look for the ISO 9660 or UDF signature.
852+
853+
http://wiki.osdev.org/ISO_9660
854+
http://wiki.osdev.org/UDF
855+
mkisofs --help | grep udf
856+
857+
The Universal Disc Format or UDF is the filesystem used on DVDs and
858+
Blu-Ray discs.UDF is an extension of ISO 9660 and shares the same
859+
header structure and initial layout.
860+
861+
Like the CDFS(ISO 9660) file system,
862+
the UDF file system uses a 2048 byte sector size,
863+
and it designates that the first 16 sectors can be used by the OS
864+
to store proprietary data or boot logic.
865+
866+
That means we need to check the first 32KB + descriptor size
867+
to look for the ISO 9660 or UDF signature.
868+
both formats have an extent based layout, so we can't determine
869+
ahead of time where the descriptor will be located.
870+
871+
fortunately, the ISO 9660 and UDF formats have a Primary Volume Descriptor
872+
located at the beginning of the image, which contains the volume size.
873+
874+
"""
875+
876+
def __init__(self, *a, **k):
877+
super(ISOInspector, self).__init__(*a, **k)
878+
self.new_region('system_area', CaptureRegion(0, 32 * units.Ki))
879+
self.new_region('header', CaptureRegion(32 * units.Ki, 2 * units.Ki))
880+
881+
@property
882+
def format_match(self):
883+
if not self.complete:
884+
return False
885+
signature = self.region('header').data[1:6]
886+
assert len(signature) == 5
887+
return signature in (b'CD001', b'NSR02', b'NSR03')
888+
889+
@property
890+
def virtual_size(self):
891+
if not self.complete:
892+
return 0
893+
if not self.format_match:
894+
return 0
895+
896+
# the header size is 2KB or 1 sector
897+
# the first header field is the descriptor type which is 1 byte
898+
# the second field is the standard identifier which is 5 bytes
899+
# the third field is the version which is 1 byte
900+
# the rest of the header contains type specific data is 2041 bytes
901+
# see http://wiki.osdev.org/ISO_9660#The_Primary_Volume_Descriptor
902+
903+
# we need to check that the descriptor type is 1
904+
# to ensure that this is a primary volume descriptor
905+
descriptor_type = self.region('header').data[0]
906+
if descriptor_type != 1:
907+
return 0
908+
# The size in bytes of a logical block is stored at offset 128
909+
# and is 2 bytes long encoded in both little and big endian
910+
# int16_LSB-MSB so the field is 4 bytes long
911+
logical_block_size_data = self.region('header').data[128:132]
912+
assert len(logical_block_size_data) == 4
913+
# given the encoding we only need to read half the field so we
914+
# can use the first 2 bytes which are the little endian part
915+
# this is normally 2048 or 2KB but we need to check as it can be
916+
# different according to the ISO 9660 standard.
917+
logical_block_size, = struct.unpack('<H', logical_block_size_data[:2])
918+
# The volume space size is the total number of logical blocks
919+
# and is stored at offset 80 and is 8 bytes long
920+
# as with the logical block size the field is encoded in both
921+
# little and big endian as an int32_LSB-MSB
922+
volume_space_size_data = self.region('header').data[80:88]
923+
assert len(volume_space_size_data) == 8
924+
# given the encoding we only need to read half the field so we
925+
# can use the first 4 bytes which are the little endian part
926+
volume_space_size, = struct.unpack('<L', volume_space_size_data[:4])
927+
# the virtual size is the volume space size * logical block size
928+
return volume_space_size * logical_block_size
929+
930+
def __str__(self):
931+
return 'iso'
932+
933+
846934
class InfoWrapper(object):
847935
"""A file-like object that wraps another and updates a format inspector.
848936
@@ -896,6 +984,7 @@ def close(self):
896984
'vmdk': VMDKInspector,
897985
'vdi': VDIInspector,
898986
'qed': QEDInspector,
987+
'iso': ISOInspector,
899988
}
900989

901990

@@ -913,12 +1002,15 @@ def detect_file_format(filename):
9131002
"""Attempts to detect the format of a file.
9141003
9151004
This runs through a file one time, running all the known inspectors in
916-
parallel. It stops reading the file once one of them matches or all of
1005+
parallel. It stops reading the file once all of them matches or all of
9171006
them are sure they don't match.
9181007
919-
Returns the FileInspector that matched, if any. None if 'raw'.
1008+
:param filename: The path to the file to inspect.
1009+
:returns: A FormatInspector instance matching the file.
1010+
:raises: ImageFormatError if multiple formats are detected.
9201011
"""
9211012
inspectors = {k: v() for k, v in ALL_FORMATS.items()}
1013+
detections = []
9221014
with open(filename, 'rb') as f:
9231015
for chunk in chunked_reader(f):
9241016
for format, inspector in list(inspectors.items()):
@@ -930,10 +1022,17 @@ def detect_file_format(filename):
9301022
continue
9311023
if (inspector.format_match and inspector.complete and
9321024
format != 'raw'):
933-
# First complete match (other than raw) wins
934-
return inspector
1025+
# record all match (other than raw)
1026+
detections.append(inspector)
1027+
inspectors.pop(format)
9351028
if all(i.complete for i in inspectors.values()):
9361029
# If all the inspectors are sure they are not a match, avoid
9371030
# reading to the end of the file to settle on 'raw'.
9381031
break
939-
return inspectors['raw']
1032+
1033+
if len(detections) > 1:
1034+
all_formats = [str(inspector) for inspector in detections]
1035+
raise ImageFormatError(
1036+
'Multiple formats detected: %s' % ', '.join(all_formats))
1037+
1038+
return inspectors['raw'] if not detections else detections[0]

nova/tests/unit/image/test_format_inspector.py

Lines changed: 93 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,13 @@ def tearDown(self):
5454
except Exception:
5555
pass
5656

57-
def _create_iso(self, image_size, subformat='iso-9660'):
57+
def _create_iso(self, image_size, subformat='9660'):
58+
"""Create an ISO file of the given size.
59+
60+
:param image_size: The size of the image to create in bytes
61+
:param subformat: The subformat to use, if any
62+
"""
63+
5864
# these tests depend on mkisofs
5965
# being installed and in the path,
6066
# if it is not installed, skip
@@ -86,12 +92,22 @@ def _create_iso(self, image_size, subformat='iso-9660'):
8692
'dd if=/dev/zero of=%s bs=1M count=%i' % (fn, size),
8793
shell=True)
8894
subprocess.check_output(
89-
'%s -o %s -V "TEST" -J -r %s' % (base_cmd, fn, fn),
95+
'%s -V "TEST" -o %s %s' % (base_cmd, fn, fn),
9096
shell=True)
9197
return fn
9298

93-
def _create_img(self, fmt, size, subformat=None, options=None,
94-
backing_file=None):
99+
def _create_img(
100+
self, fmt, size, subformat=None, options=None,
101+
backing_file=None):
102+
"""Create an image file of the given format and size.
103+
104+
:param fmt: The format to create
105+
:param size: The size of the image to create in bytes
106+
:param subformat: The subformat to use, if any
107+
:param options: A dictionary of options to pass to the format
108+
:param backing_file: The backing file to use, if any
109+
"""
110+
95111
if fmt == 'iso':
96112
return self._create_iso(size, subformat)
97113

@@ -177,6 +193,13 @@ def _test_format_at_block_size(self, format_name, img, block_size):
177193

178194
def _test_format_at_image_size(self, format_name, image_size,
179195
subformat=None):
196+
"""Test the format inspector for the given format at the
197+
given image size.
198+
199+
:param format_name: The format to test
200+
:param image_size: The size of the image to create in bytes
201+
:param subformat: The subformat to use, if any
202+
"""
180203
img = self._create_img(format_name, image_size, subformat=subformat)
181204

182205
# Some formats have internal alignment restrictions making this not
@@ -185,7 +208,15 @@ def _test_format_at_image_size(self, format_name, image_size,
185208

186209
# Read the format in various sizes, some of which will read whole
187210
# sections in a single read, others will be completely unaligned, etc.
188-
for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
211+
block_sizes = [64 * units.Ki, 1 * units.Mi]
212+
# ISO images have a 32KB system area at the beginning of the image
213+
# as a result reading that in 17 or 512 byte blocks takes too long,
214+
# causing the test to fail. The 64KiB block size is enough to read
215+
# the system area and header in a single read. the 1MiB block size
216+
# adds very little time to the test so we include it.
217+
if format_name != 'iso':
218+
block_sizes.extend([17, 512])
219+
for block_size in block_sizes:
189220
fmt = self._test_format_at_block_size(format_name, img, block_size)
190221
self.assertTrue(fmt.format_match,
191222
'Failed to match %s at size %i block %i' % (
@@ -210,14 +241,63 @@ def test_qcow2(self):
210241
self._test_format('qcow2')
211242

212243
def test_iso_9660(self):
213-
# reproduce iso-9660 format regression
214-
self.assertRaises(
215-
TypeError, self._test_format, 'iso', subformat='iso-9660')
216-
217-
def test_udf(self):
218-
# reproduce udf format regression
219-
self.assertRaises(
220-
TypeError, self._test_format, 'iso', subformat='udf')
244+
self._test_format('iso', subformat='9660')
245+
246+
def test_iso_udf(self):
247+
self._test_format('iso', subformat='udf')
248+
249+
def _generate_bad_iso(self):
250+
# we want to emulate a malicious user who uploads a an
251+
# ISO file has a qcow2 header in the system area
252+
# of the ISO file
253+
# we will create a qcow2 image and an ISO file
254+
# and then copy the qcow2 header to the ISO file
255+
# e.g.
256+
# mkisofs -o orig.iso /etc/resolv.conf
257+
# qemu-img create orig.qcow2 -f qcow2 64M
258+
# dd if=orig.qcow2 of=outcome bs=32K count=1
259+
# dd if=orig.iso of=outcome bs=32K skip=1 seek=1
260+
261+
qcow = self._create_img('qcow2', 10 * units.Mi)
262+
iso = self._create_iso(64 * units.Mi, subformat='9660')
263+
# first ensure the files are valid
264+
iso_fmt = self._test_format_at_block_size('iso', iso, 4 * units.Ki)
265+
self.assertTrue(iso_fmt.format_match)
266+
qcow_fmt = self._test_format_at_block_size('qcow2', qcow, 4 * units.Ki)
267+
self.assertTrue(qcow_fmt.format_match)
268+
# now copy the qcow2 header to an ISO file
269+
prefix = TEST_IMAGE_PREFIX
270+
prefix += '-bad-'
271+
fn = tempfile.mktemp(prefix=prefix, suffix='.iso')
272+
self._created_files.append(fn)
273+
subprocess.check_output(
274+
'dd if=%s of=%s bs=32K count=1' % (qcow, fn),
275+
shell=True)
276+
subprocess.check_output(
277+
'dd if=%s of=%s bs=32K skip=1 seek=1' % (iso, fn),
278+
shell=True)
279+
return qcow, iso, fn
280+
281+
def test_bad_iso_qcow2(self):
282+
283+
_, _, fn = self._generate_bad_iso()
284+
285+
iso_check = self._test_format_at_block_size('iso', fn, 4 * units.Ki)
286+
qcow_check = self._test_format_at_block_size('qcow2', fn, 4 * units.Ki)
287+
# this system area of the ISO file is not considered part of the format
288+
# the qcow2 header is in the system area of the ISO file
289+
# so the ISO file is still valid
290+
self.assertTrue(iso_check.format_match)
291+
# the qcow2 header is in the system area of the ISO file
292+
# but that will be parsed by the qcow2 format inspector
293+
# and it will match
294+
self.assertTrue(qcow_check.format_match)
295+
# if we call format_inspector.detect_file_format it should detect
296+
# and raise an exception because both match internally.
297+
e = self.assertRaises(
298+
format_inspector.ImageFormatError,
299+
format_inspector.detect_file_format, fn)
300+
self.assertIn('Multiple formats detected', str(e))
221301

222302
def test_vhd(self):
223303
self._test_format('vhd')

nova/tests/unit/virt/test_images.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,34 @@ def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
235235
images.fetch_to_raw, None, 'foo', 'anypath')
236236
self.assertIn('Invalid VMDK create-type specified', str(e))
237237

238+
@mock.patch('os.rename')
239+
@mock.patch.object(images, 'IMAGE_API')
240+
@mock.patch('nova.image.format_inspector.get_inspector')
241+
@mock.patch.object(images, 'fetch')
242+
@mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
243+
def test_fetch_iso_is_raw(self, mock_info, mock_fetch, mock_gi,
244+
mock_glance, mock_rename):
245+
mock_glance.get.return_value = {'disk_format': 'iso'}
246+
inspector = mock_gi.return_value.from_file.return_value
247+
inspector.safety_check.return_value = True
248+
# qemu-img does not have a parser for iso so it is treated as raw
249+
info = {
250+
"virtual-size": 356352,
251+
"filename": "foo.iso",
252+
"format": "raw",
253+
"actual-size": 356352,
254+
"dirty-flag": False
255+
}
256+
mock_info.return_value = jsonutils.dumps(info)
257+
with mock.patch('os.path.exists', return_value=True):
258+
images.fetch_to_raw(None, 'foo', 'anypath')
259+
# Make sure we called info with -f raw for an iso, since qemu-img does
260+
# not support iso
261+
mock_info.assert_called_once_with('anypath.part', format='raw')
262+
# Make sure that since we considered this to be a raw file, we did the
263+
# just-rename-don't-convert path
264+
mock_rename.assert_called_once_with('anypath.part', 'anypath')
265+
238266
@mock.patch.object(images, 'IMAGE_API')
239267
@mock.patch('nova.image.format_inspector.get_inspector')
240268
@mock.patch.object(images, 'qemu_img_info')

nova/virt/images.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ def do_image_deep_inspection(img, image_href, path):
171171
raise exception.ImageUnacceptable(
172172
image_id=image_href,
173173
reason=_('Image not in a supported format'))
174+
175+
if disk_format == 'iso':
176+
# ISO image passed safety check; qemu will treat this as raw from here
177+
disk_format = 'raw'
178+
174179
return disk_format
175180

176181

0 commit comments

Comments
 (0)