28
28
LOG = logging .getLogger (__name__ )
29
29
30
30
31
+ def chunked_reader (fileobj , chunk_size = 512 ):
32
+ while True :
33
+ chunk = fileobj .read (chunk_size )
34
+ if not chunk :
35
+ break
36
+ yield chunk
37
+
38
+
31
39
class CaptureRegion (object ):
32
40
"""Represents a region of a file we want to capture.
33
41
@@ -176,10 +184,16 @@ def virtual_size(self):
176
184
@property
177
185
def actual_size (self ):
178
186
"""Returns the total size of the file, usually smaller than
179
- virtual_size.
187
+ virtual_size. NOTE: this will only be accurate if the entire
188
+ file is read and processed.
180
189
"""
181
190
return self ._total_count
182
191
192
+ @property
193
+ def complete (self ):
194
+ """Returns True if we have all the information needed."""
195
+ return all (r .complete for r in self ._capture_regions .values ())
196
+
183
197
def __str__ (self ):
184
198
"""The string name of this file format."""
185
199
return 'raw'
@@ -194,6 +208,35 @@ def context_info(self):
194
208
return {name : len (region .data ) for name , region in
195
209
self ._capture_regions .items ()}
196
210
211
+ @classmethod
212
+ def from_file (cls , filename ):
213
+ """Read as much of a file as necessary to complete inspection.
214
+
215
+ NOTE: Because we only read as much of the file as necessary, the
216
+ actual_size property will not reflect the size of the file, but the
217
+ amount of data we read before we satisfied the inspector.
218
+
219
+ Raises ImageFormatError if we cannot parse the file.
220
+ """
221
+ inspector = cls ()
222
+ with open (filename , 'rb' ) as f :
223
+ for chunk in chunked_reader (f ):
224
+ inspector .eat_chunk (chunk )
225
+ if inspector .complete :
226
+ # No need to eat any more data
227
+ break
228
+ if not inspector .complete or not inspector .format_match :
229
+ raise ImageFormatError ('File is not in requested format' )
230
+ return inspector
231
+
232
+ def safety_check (self ):
233
+ """Perform some checks to determine if this file is safe.
234
+
235
+ Returns True if safe, False otherwise. It may raise ImageFormatError
236
+ if safety cannot be guaranteed because of parsing or other errors.
237
+ """
238
+ return True
239
+
197
240
198
241
# The qcow2 format consists of a big-endian 72-byte header, of which
199
242
# only a small portion has information we care about:
@@ -202,15 +245,26 @@ def context_info(self):
202
245
# 0 0x00 Magic 4-bytes 'QFI\xfb'
203
246
# 4 0x04 Version (uint32_t, should always be 2 for modern files)
204
247
# . . .
248
+ # 8 0x08 Backing file offset (uint64_t)
205
249
# 24 0x18 Size in bytes (unint64_t)
250
+ # . . .
251
+ # 72 0x48 Incompatible features bitfield (6 bytes)
206
252
#
207
- # https://people.gnome.org/~markmc/qcow-image-format.html
253
+ # https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt
208
254
class QcowInspector (FileInspector ):
209
255
"""QEMU QCOW2 Format
210
256
211
257
This should only require about 32 bytes of the beginning of the file
212
- to determine the virtual size.
258
+ to determine the virtual size, and 104 bytes to perform the safety check .
213
259
"""
260
+
261
+ BF_OFFSET = 0x08
262
+ BF_OFFSET_LEN = 8
263
+ I_FEATURES = 0x48
264
+ I_FEATURES_LEN = 8
265
+ I_FEATURES_DATAFILE_BIT = 3
266
+ I_FEATURES_MAX_BIT = 4
267
+
214
268
def __init__ (self , * a , ** k ):
215
269
super (QcowInspector , self ).__init__ (* a , ** k )
216
270
self .new_region ('header' , CaptureRegion (0 , 512 ))
@@ -220,6 +274,10 @@ def _qcow_header_data(self):
220
274
struct .unpack ('>4sIQIIQ' , self .region ('header' ).data [:32 ]))
221
275
return magic , size
222
276
277
+ @property
278
+ def has_header (self ):
279
+ return self .region ('header' ).complete
280
+
223
281
@property
224
282
def virtual_size (self ):
225
283
if not self .region ('header' ).complete :
@@ -236,9 +294,77 @@ def format_match(self):
236
294
magic , size = self ._qcow_header_data ()
237
295
return magic == b'QFI\xFB '
238
296
297
+ @property
298
+ def has_backing_file (self ):
299
+ if not self .region ('header' ).complete :
300
+ return None
301
+ if not self .format_match :
302
+ return False
303
+ bf_offset_bytes = self .region ('header' ).data [
304
+ self .BF_OFFSET :self .BF_OFFSET + self .BF_OFFSET_LEN ]
305
+ # nonzero means "has a backing file"
306
+ bf_offset , = struct .unpack ('>Q' , bf_offset_bytes )
307
+ return bf_offset != 0
308
+
309
+ @property
310
+ def has_unknown_features (self ):
311
+ if not self .region ('header' ).complete :
312
+ return None
313
+ if not self .format_match :
314
+ return False
315
+ i_features = self .region ('header' ).data [
316
+ self .I_FEATURES :self .I_FEATURES + self .I_FEATURES_LEN ]
317
+
318
+ # This is the maximum byte number we should expect any bits to be set
319
+ max_byte = self .I_FEATURES_MAX_BIT // 8
320
+
321
+ # The flag bytes are in big-endian ordering, so if we process
322
+ # them in index-order, they're reversed
323
+ for i , byte_num in enumerate (reversed (range (self .I_FEATURES_LEN ))):
324
+ if byte_num == max_byte :
325
+ # If we're in the max-allowed byte, allow any bits less than
326
+ # the maximum-known feature flag bit to be set
327
+ allow_mask = ((1 << self .I_FEATURES_MAX_BIT ) - 1 )
328
+ elif byte_num > max_byte :
329
+ # If we're above the byte with the maximum known feature flag
330
+ # bit, then we expect all zeroes
331
+ allow_mask = 0x0
332
+ else :
333
+ # Any earlier-than-the-maximum byte can have any of the flag
334
+ # bits set
335
+ allow_mask = 0xFF
336
+
337
+ if i_features [i ] & ~ allow_mask :
338
+ LOG .warning ('Found unknown feature bit in byte %i: %s/%s' ,
339
+ byte_num , bin (i_features [byte_num ] & ~ allow_mask ),
340
+ bin (allow_mask ))
341
+ return True
342
+
343
+ return False
344
+
345
+ @property
346
+ def has_data_file (self ):
347
+ if not self .region ('header' ).complete :
348
+ return None
349
+ if not self .format_match :
350
+ return False
351
+ i_features = self .region ('header' ).data [
352
+ self .I_FEATURES :self .I_FEATURES + self .I_FEATURES_LEN ]
353
+
354
+ # First byte of bitfield, which is i_features[7]
355
+ byte = self .I_FEATURES_LEN - 1 - self .I_FEATURES_DATAFILE_BIT // 8
356
+ # Third bit of bitfield, which is 0x04
357
+ bit = 1 << (self .I_FEATURES_DATAFILE_BIT - 1 % 8 )
358
+ return bool (i_features [byte ] & bit )
359
+
239
360
def __str__ (self ):
240
361
return 'qcow2'
241
362
363
+ def safety_check (self ):
364
+ return (not self .has_backing_file and
365
+ not self .has_data_file and
366
+ not self .has_unknown_features )
367
+
242
368
243
369
# The VHD (or VPC as QEMU calls it) format consists of a big-endian
244
370
# 512-byte "footer" at the beginning of the file with various
0 commit comments