Skip to content

Commit 155a2fb

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Add file format detection to format_inspector" into stable/2023.1
2 parents d6ada3f + 1656fe8 commit 155a2fb

File tree

2 files changed

+43
-67
lines changed

2 files changed

+43
-67
lines changed

glance/common/format_inspector.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -883,20 +883,52 @@ def close(self):
883883
self._source.close()
884884

885885

886+
ALL_FORMATS = {
887+
'raw': FileInspector,
888+
'qcow2': QcowInspector,
889+
'vhd': VHDInspector,
890+
'vhdx': VHDXInspector,
891+
'vmdk': VMDKInspector,
892+
'vdi': VDIInspector,
893+
'qed': QEDInspector,
894+
}
895+
896+
886897
def get_inspector(format_name):
887898
"""Returns a FormatInspector class based on the given name.
888899
889900
:param format_name: The name of the disk_format (raw, qcow2, etc).
890901
:returns: A FormatInspector or None if unsupported.
891902
"""
892-
formats = {
893-
'raw': FileInspector,
894-
'qcow2': QcowInspector,
895-
'vhd': VHDInspector,
896-
'vhdx': VHDXInspector,
897-
'vmdk': VMDKInspector,
898-
'vdi': VDIInspector,
899-
'qed': QEDInspector,
900-
}
901-
902-
return formats.get(format_name)
903+
904+
return ALL_FORMATS.get(format_name)
905+
906+
907+
def detect_file_format(filename):
908+
"""Attempts to detect the format of a file.
909+
910+
This runs through a file one time, running all the known inspectors in
911+
parallel. It stops reading the file once one of them matches or all of
912+
them are sure they don't match.
913+
914+
Returns the FileInspector that matched, if any. None if 'raw'.
915+
"""
916+
inspectors = {k: v() for k, v in ALL_FORMATS.items()}
917+
with open(filename, 'rb') as f:
918+
for chunk in chunked_reader(f):
919+
for format, inspector in list(inspectors.items()):
920+
try:
921+
inspector.eat_chunk(chunk)
922+
except ImageFormatError:
923+
# No match, so stop considering this format
924+
inspectors.pop(format)
925+
continue
926+
if (inspector.format_match and inspector.complete and
927+
format != 'raw'):
928+
# First complete match (other than raw) wins
929+
return inspector
930+
if all(i.complete for i in inspectors.values()):
931+
# If all the inspectors are sure they are not a match, avoid
932+
# reading to the end of the file to settle on 'raw'.
933+
break
934+
return inspectors['raw']

glance/tests/unit/common/test_format_inspector.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -313,62 +313,6 @@ def test_qcow2_feature_flag_checks(self):
313313
data[0x4F] = 0x80
314314
self.assertTrue(inspector.has_unknown_features)
315315

316-
def test_vmdk_safety_checks(self):
317-
region = format_inspector.CaptureRegion(0, 0)
318-
inspector = format_inspector.VMDKInspector()
319-
inspector.new_region('descriptor', region)
320-
321-
# This should be a legit VMDK descriptor which comments, blank lines,
322-
# an extent, some ddb content, and some header values.
323-
legit_desc = ['# This is a comment',
324-
'',
325-
' ',
326-
'createType=monolithicSparse',
327-
'RW 1234 SPARSE "foo.vmdk"',
328-
'ddb.adapterType = "MFM',
329-
'# EOF']
330-
region.data = ('\n'.join(legit_desc)).encode('ascii')
331-
region.length = len(region.data)
332-
self.assertTrue(inspector.safety_check())
333-
334-
# Any of these lines should trigger an error indicating that there is
335-
# something in the descriptor we don't understand
336-
bad_lines = [
337-
'#\U0001F4A9',
338-
'header Name=foo',
339-
'foo bar',
340-
'WR 123 SPARSE "foo.vmdk"',
341-
]
342-
343-
for bad_line in bad_lines:
344-
# Encode as UTF-8 purely so we can test that anything non-ASCII
345-
# will trigger the decode check
346-
region.data = bad_line.encode('utf-8')
347-
region.length = len(region.data)
348-
self.assertRaisesRegex(format_inspector.ImageFormatError,
349-
'Invalid VMDK descriptor',
350-
inspector.safety_check)
351-
352-
# Extents with slashes in the name fail the safety check
353-
region.data = b'RW 123 SPARSE "/etc/shadow"'
354-
region.length = len(region.data)
355-
self.assertFalse(inspector.safety_check())
356-
357-
# A descriptor that specifies no extents fails the safety check
358-
region.data = b'# Nothing'
359-
region.length = len(region.data)
360-
self.assertFalse(inspector.safety_check())
361-
362-
def test_vmdk_reject_footer(self):
363-
data = struct.pack('<4sIIQQQQIQQ', b'KDMV', 3, 0, 0, 0, 0, 1, 0, 0,
364-
format_inspector.VMDKInspector.GD_AT_END)
365-
inspector = format_inspector.VMDKInspector()
366-
inspector.region('header').data = data
367-
inspector.region('header').length = len(data)
368-
self.assertRaisesRegex(format_inspector.ImageFormatError,
369-
'footer',
370-
inspector.post_process)
371-
372316
def test_vdi(self):
373317
self._test_format('vdi')
374318

0 commit comments

Comments
 (0)