2424import struct
2525
2626from oslo_log import log as logging
27+ from oslo_utils import units
2728
2829LOG = logging .getLogger (__name__ )
2930
@@ -843,6 +844,93 @@ def __str__(self):
843844 return 'vdi'
844845
845846
847+ class ISOInspector (FileInspector ):
848+ """ISO 9660 and UDF format
849+
850+ we need to check the first 32KB + descriptor size
851+ to look for the ISO 9660 or UDF signature.
852+
853+ http://wiki.osdev.org/ISO_9660
854+ http://wiki.osdev.org/UDF
855+ mkisofs --help | grep udf
856+
857+ The Universal Disc Format or UDF is the filesystem used on DVDs and
858+ Blu-Ray discs.UDF is an extension of ISO 9660 and shares the same
859+ header structure and initial layout.
860+
861+ Like the CDFS(ISO 9660) file system,
862+ the UDF file system uses a 2048 byte sector size,
863+ and it designates that the first 16 sectors can be used by the OS
864+ to store proprietary data or boot logic.
865+
866+ That means we need to check the first 32KB + descriptor size
867+ to look for the ISO 9660 or UDF signature.
868+ both formats have an extent based layout, so we can't determine
869+ ahead of time where the descriptor will be located.
870+
871+ fortunately, the ISO 9660 and UDF formats have a Primary Volume Descriptor
872+ located at the beginning of the image, which contains the volume size.
873+
874+ """
875+
876+ def __init__ (self , * a , ** k ):
877+ super (ISOInspector , self ).__init__ (* a , ** k )
878+ self .new_region ('system_area' , CaptureRegion (0 , 32 * units .Ki ))
879+ self .new_region ('header' , CaptureRegion (32 * units .Ki , 2 * units .Ki ))
880+
881+ @property
882+ def format_match (self ):
883+ if not self .complete :
884+ return False
885+ signature = self .region ('header' ).data [1 :6 ]
886+ assert len (signature ) == 5
887+ return signature in (b'CD001' , b'NSR02' , b'NSR03' )
888+
889+ @property
890+ def virtual_size (self ):
891+ if not self .complete :
892+ return 0
893+ if not self .format_match :
894+ return 0
895+
896+ # the header size is 2KB or 1 sector
897+ # the first header field is the descriptor type which is 1 byte
898+ # the second field is the standard identifier which is 5 bytes
899+ # the third field is the version which is 1 byte
900+ # the rest of the header contains type specific data is 2041 bytes
901+ # see http://wiki.osdev.org/ISO_9660#The_Primary_Volume_Descriptor
902+
903+ # we need to check that the descriptor type is 1
904+ # to ensure that this is a primary volume descriptor
905+ descriptor_type = self .region ('header' ).data [0 ]
906+ if descriptor_type != 1 :
907+ return 0
908+ # The size in bytes of a logical block is stored at offset 128
909+ # and is 2 bytes long encoded in both little and big endian
910+ # int16_LSB-MSB so the field is 4 bytes long
911+ logical_block_size_data = self .region ('header' ).data [128 :132 ]
912+ assert len (logical_block_size_data ) == 4
913+ # given the encoding we only need to read half the field so we
914+ # can use the first 2 bytes which are the little endian part
915+ # this is normally 2048 or 2KB but we need to check as it can be
916+ # different according to the ISO 9660 standard.
917+ logical_block_size , = struct .unpack ('<H' , logical_block_size_data [:2 ])
918+ # The volume space size is the total number of logical blocks
919+ # and is stored at offset 80 and is 8 bytes long
920+ # as with the logical block size the field is encoded in both
921+ # little and big endian as an int32_LSB-MSB
922+ volume_space_size_data = self .region ('header' ).data [80 :88 ]
923+ assert len (volume_space_size_data ) == 8
924+ # given the encoding we only need to read half the field so we
925+ # can use the first 4 bytes which are the little endian part
926+ volume_space_size , = struct .unpack ('<L' , volume_space_size_data [:4 ])
927+ # the virtual size is the volume space size * logical block size
928+ return volume_space_size * logical_block_size
929+
930+ def __str__ (self ):
931+ return 'iso'
932+
933+
846934class InfoWrapper (object ):
847935 """A file-like object that wraps another and updates a format inspector.
848936
@@ -896,6 +984,7 @@ def close(self):
896984 'vmdk' : VMDKInspector ,
897985 'vdi' : VDIInspector ,
898986 'qed' : QEDInspector ,
987+ 'iso' : ISOInspector ,
899988}
900989
901990
@@ -913,12 +1002,15 @@ def detect_file_format(filename):
9131002 """Attempts to detect the format of a file.
9141003
9151004 This runs through a file one time, running all the known inspectors in
916- parallel. It stops reading the file once one of them matches or all of
1005+ parallel. It stops reading the file once all of them matches or all of
9171006 them are sure they don't match.
9181007
919- Returns the FileInspector that matched, if any. None if 'raw'.
1008+ :param filename: The path to the file to inspect.
1009+ :returns: A FormatInspector instance matching the file.
1010+ :raises: ImageFormatError if multiple formats are detected.
9201011 """
9211012 inspectors = {k : v () for k , v in ALL_FORMATS .items ()}
1013+ detections = []
9221014 with open (filename , 'rb' ) as f :
9231015 for chunk in chunked_reader (f ):
9241016 for format , inspector in list (inspectors .items ()):
@@ -930,10 +1022,17 @@ def detect_file_format(filename):
9301022 continue
9311023 if (inspector .format_match and inspector .complete and
9321024 format != 'raw' ):
933- # First complete match (other than raw) wins
934- return inspector
1025+ # record all match (other than raw)
1026+ detections .append (inspector )
1027+ inspectors .pop (format )
9351028 if all (i .complete for i in inspectors .values ()):
9361029 # If all the inspectors are sure they are not a match, avoid
9371030 # reading to the end of the file to settle on 'raw'.
9381031 break
939- return inspectors ['raw' ]
1032+
1033+ if len (detections ) > 1 :
1034+ all_formats = [str (inspector ) for inspector in detections ]
1035+ raise ImageFormatError (
1036+ 'Multiple formats detected: %s' % ', ' .join (all_formats ))
1037+
1038+ return inspectors ['raw' ] if not detections else detections [0 ]
0 commit comments