Skip to content

Commit 65f0789

Browse files
committed
Add iso file format inspector
This change includes unit tests for the ISO format inspector using mkisofs to generate the iso files. A test for stashing qcow content in the system_area of an iso file is also included. This change modifies format_inspector.detect_file_format to evaluate all inspectors until they are complete and raise an InvalidDiskInfo exception if multiple formats match. Related-Bug: #2059809 Change-Id: I7e12718fb3e1f77eb8d1cfcb9fa64e8ddeb9e712 (cherry picked from commit b1cc398) (cherry picked from commit eeda7c3) (cherry picked from commit 24628ec)
1 parent 1233d7b commit 65f0789

File tree

4 files changed

+230
-18
lines changed

4 files changed

+230
-18
lines changed

nova/image/format_inspector.py

Lines changed: 104 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import struct
2525

2626
from oslo_log import log as logging
27+
from oslo_utils import units
2728

2829
LOG = logging.getLogger(__name__)
2930

@@ -843,6 +844,93 @@ def __str__(self):
843844
return 'vdi'
844845

845846

847+
class ISOInspector(FileInspector):
848+
"""ISO 9660 and UDF format
849+
850+
we need to check the first 32KB + descriptor size
851+
to look for the ISO 9660 or UDF signature.
852+
853+
http://wiki.osdev.org/ISO_9660
854+
http://wiki.osdev.org/UDF
855+
mkisofs --help | grep udf
856+
857+
The Universal Disc Format or UDF is the filesystem used on DVDs and
858+
Blu-Ray discs.UDF is an extension of ISO 9660 and shares the same
859+
header structure and initial layout.
860+
861+
Like the CDFS(ISO 9660) file system,
862+
the UDF file system uses a 2048 byte sector size,
863+
and it designates that the first 16 sectors can be used by the OS
864+
to store proprietary data or boot logic.
865+
866+
That means we need to check the first 32KB + descriptor size
867+
to look for the ISO 9660 or UDF signature.
868+
both formats have an extent based layout, so we can't determine
869+
ahead of time where the descriptor will be located.
870+
871+
fortunately, the ISO 9660 and UDF formats have a Primary Volume Descriptor
872+
located at the beginning of the image, which contains the volume size.
873+
874+
"""
875+
876+
def __init__(self, *a, **k):
877+
super(ISOInspector, self).__init__(*a, **k)
878+
self.new_region('system_area', CaptureRegion(0, 32 * units.Ki))
879+
self.new_region('header', CaptureRegion(32 * units.Ki, 2 * units.Ki))
880+
881+
@property
882+
def format_match(self):
883+
if not self.complete:
884+
return False
885+
signature = self.region('header').data[1:6]
886+
assert len(signature) == 5
887+
return signature in (b'CD001', b'NSR02', b'NSR03')
888+
889+
@property
890+
def virtual_size(self):
891+
if not self.complete:
892+
return 0
893+
if not self.format_match:
894+
return 0
895+
896+
# the header size is 2KB or 1 sector
897+
# the first header field is the descriptor type which is 1 byte
898+
# the second field is the standard identifier which is 5 bytes
899+
# the third field is the version which is 1 byte
900+
# the rest of the header contains type specific data is 2041 bytes
901+
# see http://wiki.osdev.org/ISO_9660#The_Primary_Volume_Descriptor
902+
903+
# we need to check that the descriptor type is 1
904+
# to ensure that this is a primary volume descriptor
905+
descriptor_type = self.region('header').data[0]
906+
if descriptor_type != 1:
907+
return 0
908+
# The size in bytes of a logical block is stored at offset 128
909+
# and is 2 bytes long encoded in both little and big endian
910+
# int16_LSB-MSB so the field is 4 bytes long
911+
logical_block_size_data = self.region('header').data[128:132]
912+
assert len(logical_block_size_data) == 4
913+
# given the encoding we only need to read half the field so we
914+
# can use the first 2 bytes which are the little endian part
915+
# this is normally 2048 or 2KB but we need to check as it can be
916+
# different according to the ISO 9660 standard.
917+
logical_block_size, = struct.unpack('<H', logical_block_size_data[:2])
918+
# The volume space size is the total number of logical blocks
919+
# and is stored at offset 80 and is 8 bytes long
920+
# as with the logical block size the field is encoded in both
921+
# little and big endian as an int32_LSB-MSB
922+
volume_space_size_data = self.region('header').data[80:88]
923+
assert len(volume_space_size_data) == 8
924+
# given the encoding we only need to read half the field so we
925+
# can use the first 4 bytes which are the little endian part
926+
volume_space_size, = struct.unpack('<L', volume_space_size_data[:4])
927+
# the virtual size is the volume space size * logical block size
928+
return volume_space_size * logical_block_size
929+
930+
def __str__(self):
931+
return 'iso'
932+
933+
846934
class InfoWrapper(object):
847935
"""A file-like object that wraps another and updates a format inspector.
848936
@@ -896,6 +984,7 @@ def close(self):
896984
'vmdk': VMDKInspector,
897985
'vdi': VDIInspector,
898986
'qed': QEDInspector,
987+
'iso': ISOInspector,
899988
}
900989

901990

@@ -913,12 +1002,15 @@ def detect_file_format(filename):
9131002
"""Attempts to detect the format of a file.
9141003
9151004
This runs through a file one time, running all the known inspectors in
916-
parallel. It stops reading the file once one of them matches or all of
1005+
parallel. It stops reading the file once all of them matches or all of
9171006
them are sure they don't match.
9181007
919-
Returns the FileInspector that matched, if any. None if 'raw'.
1008+
:param filename: The path to the file to inspect.
1009+
:returns: A FormatInspector instance matching the file.
1010+
:raises: ImageFormatError if multiple formats are detected.
9201011
"""
9211012
inspectors = {k: v() for k, v in ALL_FORMATS.items()}
1013+
detections = []
9221014
with open(filename, 'rb') as f:
9231015
for chunk in chunked_reader(f):
9241016
for format, inspector in list(inspectors.items()):
@@ -930,10 +1022,17 @@ def detect_file_format(filename):
9301022
continue
9311023
if (inspector.format_match and inspector.complete and
9321024
format != 'raw'):
933-
# First complete match (other than raw) wins
934-
return inspector
1025+
# record all match (other than raw)
1026+
detections.append(inspector)
1027+
inspectors.pop(format)
9351028
if all(i.complete for i in inspectors.values()):
9361029
# If all the inspectors are sure they are not a match, avoid
9371030
# reading to the end of the file to settle on 'raw'.
9381031
break
939-
return inspectors['raw']
1032+
1033+
if len(detections) > 1:
1034+
all_formats = [str(inspector) for inspector in detections]
1035+
raise ImageFormatError(
1036+
'Multiple formats detected: %s' % ', '.join(all_formats))
1037+
1038+
return inspectors['raw'] if not detections else detections[0]

nova/tests/unit/image/test_format_inspector.py

Lines changed: 93 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,13 @@ def tearDown(self):
5454
except Exception:
5555
pass
5656

57-
def _create_iso(self, image_size, subformat='iso-9660'):
57+
def _create_iso(self, image_size, subformat='9660'):
58+
"""Create an ISO file of the given size.
59+
60+
:param image_size: The size of the image to create in bytes
61+
:param subformat: The subformat to use, if any
62+
"""
63+
5864
# these tests depend on mkisofs
5965
# being installed and in the path,
6066
# if it is not installed, skip
@@ -86,12 +92,22 @@ def _create_iso(self, image_size, subformat='iso-9660'):
8692
'dd if=/dev/zero of=%s bs=1M count=%i' % (fn, size),
8793
shell=True)
8894
subprocess.check_output(
89-
'%s -o %s -V "TEST" -J -r %s' % (base_cmd, fn, fn),
95+
'%s -V "TEST" -o %s %s' % (base_cmd, fn, fn),
9096
shell=True)
9197
return fn
9298

93-
def _create_img(self, fmt, size, subformat=None, options=None,
94-
backing_file=None):
99+
def _create_img(
100+
self, fmt, size, subformat=None, options=None,
101+
backing_file=None):
102+
"""Create an image file of the given format and size.
103+
104+
:param fmt: The format to create
105+
:param size: The size of the image to create in bytes
106+
:param subformat: The subformat to use, if any
107+
:param options: A dictionary of options to pass to the format
108+
:param backing_file: The backing file to use, if any
109+
"""
110+
95111
if fmt == 'iso':
96112
return self._create_iso(size, subformat)
97113

@@ -177,6 +193,13 @@ def _test_format_at_block_size(self, format_name, img, block_size):
177193

178194
def _test_format_at_image_size(self, format_name, image_size,
179195
subformat=None):
196+
"""Test the format inspector for the given format at the
197+
given image size.
198+
199+
:param format_name: The format to test
200+
:param image_size: The size of the image to create in bytes
201+
:param subformat: The subformat to use, if any
202+
"""
180203
img = self._create_img(format_name, image_size, subformat=subformat)
181204

182205
# Some formats have internal alignment restrictions making this not
@@ -185,7 +208,15 @@ def _test_format_at_image_size(self, format_name, image_size,
185208

186209
# Read the format in various sizes, some of which will read whole
187210
# sections in a single read, others will be completely unaligned, etc.
188-
for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
211+
block_sizes = [64 * units.Ki, 1 * units.Mi]
212+
# ISO images have a 32KB system area at the beginning of the image
213+
# as a result reading that in 17 or 512 byte blocks takes too long,
214+
# causing the test to fail. The 64KiB block size is enough to read
215+
# the system area and header in a single read. the 1MiB block size
216+
# adds very little time to the test so we include it.
217+
if format_name != 'iso':
218+
block_sizes.extend([17, 512])
219+
for block_size in block_sizes:
189220
fmt = self._test_format_at_block_size(format_name, img, block_size)
190221
self.assertTrue(fmt.format_match,
191222
'Failed to match %s at size %i block %i' % (
@@ -210,14 +241,63 @@ def test_qcow2(self):
210241
self._test_format('qcow2')
211242

212243
def test_iso_9660(self):
213-
# reproduce iso-9660 format regression
214-
self.assertRaises(
215-
TypeError, self._test_format, 'iso', subformat='iso-9660')
216-
217-
def test_udf(self):
218-
# reproduce udf format regression
219-
self.assertRaises(
220-
TypeError, self._test_format, 'iso', subformat='udf')
244+
self._test_format('iso', subformat='9660')
245+
246+
def test_iso_udf(self):
247+
self._test_format('iso', subformat='udf')
248+
249+
def _generate_bad_iso(self):
250+
# we want to emulate a malicious user who uploads a an
251+
# ISO file has a qcow2 header in the system area
252+
# of the ISO file
253+
# we will create a qcow2 image and an ISO file
254+
# and then copy the qcow2 header to the ISO file
255+
# e.g.
256+
# mkisofs -o orig.iso /etc/resolv.conf
257+
# qemu-img create orig.qcow2 -f qcow2 64M
258+
# dd if=orig.qcow2 of=outcome bs=32K count=1
259+
# dd if=orig.iso of=outcome bs=32K skip=1 seek=1
260+
261+
qcow = self._create_img('qcow2', 10 * units.Mi)
262+
iso = self._create_iso(64 * units.Mi, subformat='9660')
263+
# first ensure the files are valid
264+
iso_fmt = self._test_format_at_block_size('iso', iso, 4 * units.Ki)
265+
self.assertTrue(iso_fmt.format_match)
266+
qcow_fmt = self._test_format_at_block_size('qcow2', qcow, 4 * units.Ki)
267+
self.assertTrue(qcow_fmt.format_match)
268+
# now copy the qcow2 header to an ISO file
269+
prefix = TEST_IMAGE_PREFIX
270+
prefix += '-bad-'
271+
fn = tempfile.mktemp(prefix=prefix, suffix='.iso')
272+
self._created_files.append(fn)
273+
subprocess.check_output(
274+
'dd if=%s of=%s bs=32K count=1' % (qcow, fn),
275+
shell=True)
276+
subprocess.check_output(
277+
'dd if=%s of=%s bs=32K skip=1 seek=1' % (iso, fn),
278+
shell=True)
279+
return qcow, iso, fn
280+
281+
def test_bad_iso_qcow2(self):
282+
283+
_, _, fn = self._generate_bad_iso()
284+
285+
iso_check = self._test_format_at_block_size('iso', fn, 4 * units.Ki)
286+
qcow_check = self._test_format_at_block_size('qcow2', fn, 4 * units.Ki)
287+
# this system area of the ISO file is not considered part of the format
288+
# the qcow2 header is in the system area of the ISO file
289+
# so the ISO file is still valid
290+
self.assertTrue(iso_check.format_match)
291+
# the qcow2 header is in the system area of the ISO file
292+
# but that will be parsed by the qcow2 format inspector
293+
# and it will match
294+
self.assertTrue(qcow_check.format_match)
295+
# if we call format_inspector.detect_file_format it should detect
296+
# and raise an exception because both match internally.
297+
e = self.assertRaises(
298+
format_inspector.ImageFormatError,
299+
format_inspector.detect_file_format, fn)
300+
self.assertIn('Multiple formats detected', str(e))
221301

222302
def test_vhd(self):
223303
self._test_format('vhd')

nova/tests/unit/virt/test_images.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,34 @@ def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch, mock_gi,
235235
images.fetch_to_raw, None, 'foo', 'anypath')
236236
self.assertIn('Invalid VMDK create-type specified', str(e))
237237

238+
@mock.patch('os.rename')
239+
@mock.patch.object(images, 'IMAGE_API')
240+
@mock.patch('nova.image.format_inspector.get_inspector')
241+
@mock.patch.object(images, 'fetch')
242+
@mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info')
243+
def test_fetch_iso_is_raw(self, mock_info, mock_fetch, mock_gi,
244+
mock_glance, mock_rename):
245+
mock_glance.get.return_value = {'disk_format': 'iso'}
246+
inspector = mock_gi.return_value.from_file.return_value
247+
inspector.safety_check.return_value = True
248+
# qemu-img does not have a parser for iso so it is treated as raw
249+
info = {
250+
"virtual-size": 356352,
251+
"filename": "foo.iso",
252+
"format": "raw",
253+
"actual-size": 356352,
254+
"dirty-flag": False
255+
}
256+
mock_info.return_value = jsonutils.dumps(info)
257+
with mock.patch('os.path.exists', return_value=True):
258+
images.fetch_to_raw(None, 'foo', 'anypath')
259+
# Make sure we called info with -f raw for an iso, since qemu-img does
260+
# not support iso
261+
mock_info.assert_called_once_with('anypath.part', format='raw')
262+
# Make sure that since we considered this to be a raw file, we did the
263+
# just-rename-don't-convert path
264+
mock_rename.assert_called_once_with('anypath.part', 'anypath')
265+
238266
@mock.patch.object(images, 'IMAGE_API')
239267
@mock.patch('nova.image.format_inspector.get_inspector')
240268
@mock.patch.object(images, 'qemu_img_info')

nova/virt/images.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ def do_image_deep_inspection(img, image_href, path):
171171
raise exception.ImageUnacceptable(
172172
image_id=image_href,
173173
reason=_('Image not in a supported format'))
174+
175+
if disk_format == 'iso':
176+
# ISO image passed safety check; qemu will treat this as raw from here
177+
disk_format = 'raw'
178+
174179
return disk_format
175180

176181

0 commit comments

Comments
 (0)