Skip to content

Commit 28cac20

Browse files
kk7dselajkat
authored andcommitted
Add file format detection to format_inspector
Change-Id: If0a4251465507be035ffaf9d855299611637cfa9 (cherry picked from commit 79271eaa5c742a1741321198c43807857fb6ed94) (cherry picked from commit e1c36248c7660dea1bedfa8f1c0711a4b97d279c) (cherry picked from commit d54121d6a937fd50aae1018aede228a3c0985dce) (cherry picked from commit 1656fe8)
1 parent 0ff71aa commit 28cac20

File tree

2 files changed

+43
-67
lines changed

2 files changed

+43
-67
lines changed

glance/common/format_inspector.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -883,20 +883,52 @@ def close(self):
883883
self._source.close()
884884

885885

886+
ALL_FORMATS = {
887+
'raw': FileInspector,
888+
'qcow2': QcowInspector,
889+
'vhd': VHDInspector,
890+
'vhdx': VHDXInspector,
891+
'vmdk': VMDKInspector,
892+
'vdi': VDIInspector,
893+
'qed': QEDInspector,
894+
}
895+
896+
886897
def get_inspector(format_name):
887898
"""Returns a FormatInspector class based on the given name.
888899
889900
:param format_name: The name of the disk_format (raw, qcow2, etc).
890901
:returns: A FormatInspector or None if unsupported.
891902
"""
892-
formats = {
893-
'raw': FileInspector,
894-
'qcow2': QcowInspector,
895-
'vhd': VHDInspector,
896-
'vhdx': VHDXInspector,
897-
'vmdk': VMDKInspector,
898-
'vdi': VDIInspector,
899-
'qed': QEDInspector,
900-
}
901-
902-
return formats.get(format_name)
903+
904+
return ALL_FORMATS.get(format_name)
905+
906+
907+
def detect_file_format(filename):
908+
"""Attempts to detect the format of a file.
909+
910+
This runs through a file one time, running all the known inspectors in
911+
parallel. It stops reading the file once one of them matches or all of
912+
them are sure they don't match.
913+
914+
Returns the FileInspector that matched, if any. None if 'raw'.
915+
"""
916+
inspectors = {k: v() for k, v in ALL_FORMATS.items()}
917+
with open(filename, 'rb') as f:
918+
for chunk in chunked_reader(f):
919+
for format, inspector in list(inspectors.items()):
920+
try:
921+
inspector.eat_chunk(chunk)
922+
except ImageFormatError:
923+
# No match, so stop considering this format
924+
inspectors.pop(format)
925+
continue
926+
if (inspector.format_match and inspector.complete and
927+
format != 'raw'):
928+
# First complete match (other than raw) wins
929+
return inspector
930+
if all(i.complete for i in inspectors.values()):
931+
# If all the inspectors are sure they are not a match, avoid
932+
# reading to the end of the file to settle on 'raw'.
933+
break
934+
return inspectors['raw']

glance/tests/unit/common/test_format_inspector.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -315,62 +315,6 @@ def test_qcow2_feature_flag_checks(self):
315315
data[0x4F] = 0x80
316316
self.assertTrue(inspector.has_unknown_features)
317317

318-
def test_vmdk_safety_checks(self):
319-
region = format_inspector.CaptureRegion(0, 0)
320-
inspector = format_inspector.VMDKInspector()
321-
inspector.new_region('descriptor', region)
322-
323-
# This should be a legit VMDK descriptor which comments, blank lines,
324-
# an extent, some ddb content, and some header values.
325-
legit_desc = ['# This is a comment',
326-
'',
327-
' ',
328-
'createType=monolithicSparse',
329-
'RW 1234 SPARSE "foo.vmdk"',
330-
'ddb.adapterType = "MFM',
331-
'# EOF']
332-
region.data = ('\n'.join(legit_desc)).encode('ascii')
333-
region.length = len(region.data)
334-
self.assertTrue(inspector.safety_check())
335-
336-
# Any of these lines should trigger an error indicating that there is
337-
# something in the descriptor we don't understand
338-
bad_lines = [
339-
'#\U0001F4A9',
340-
'header Name=foo',
341-
'foo bar',
342-
'WR 123 SPARSE "foo.vmdk"',
343-
]
344-
345-
for bad_line in bad_lines:
346-
# Encode as UTF-8 purely so we can test that anything non-ASCII
347-
# will trigger the decode check
348-
region.data = bad_line.encode('utf-8')
349-
region.length = len(region.data)
350-
self.assertRaisesRegex(format_inspector.ImageFormatError,
351-
'Invalid VMDK descriptor',
352-
inspector.safety_check)
353-
354-
# Extents with slashes in the name fail the safety check
355-
region.data = b'RW 123 SPARSE "/etc/shadow"'
356-
region.length = len(region.data)
357-
self.assertFalse(inspector.safety_check())
358-
359-
# A descriptor that specifies no extents fails the safety check
360-
region.data = b'# Nothing'
361-
region.length = len(region.data)
362-
self.assertFalse(inspector.safety_check())
363-
364-
def test_vmdk_reject_footer(self):
365-
data = struct.pack('<4sIIQQQQIQQ', b'KDMV', 3, 0, 0, 0, 0, 1, 0, 0,
366-
format_inspector.VMDKInspector.GD_AT_END)
367-
inspector = format_inspector.VMDKInspector()
368-
inspector.region('header').data = data
369-
inspector.region('header').length = len(data)
370-
self.assertRaisesRegex(format_inspector.ImageFormatError,
371-
'footer',
372-
inspector.post_process)
373-
374318
def test_vdi(self):
375319
self._test_format('vdi')
376320

0 commit comments

Comments
 (0)