Skip to content

Commit a28d96b

Browse files
kk7dselajkat
authored andcommitted
Extend format_inspector for QCOW safety
This adds two properties to the QcowInspector that makes it able to indicate whether the file specifies a backing_file or data_file in the header. Both conditions are considered unsafe for our usage. To ease checking of this condition, a classmethod is added that takes a local filename and digests just enough of the file to assert that both conditions are false. Note for yoga backport:With older qemu installed one of the qemu-img create commands fails, let's skip it from unmaintained/yoga and below that. Change-Id: Iaf86b525397d41bd116999cabe0954a0a7efac65 Related-Bug: #2059809 (cherry picked from commit ae536bb394793c9a7a219cb498e03d5c81dbbbb7) (cherry picked from commit 2eba54e0821106097dfeceb424e53943fd090483) (cherry picked from commit 89dbbc838d606f461087e1494d19ddbcf9db0a38) (cherry picked from commit 4860024) (cherry picked from commit f32d5b8)
1 parent f7f7249 commit a28d96b

File tree

2 files changed

+209
-5
lines changed

2 files changed

+209
-5
lines changed

glance/common/format_inspector.py

Lines changed: 129 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@
2828
LOG = logging.getLogger(__name__)
2929

3030

31+
def chunked_reader(fileobj, chunk_size=512):
32+
while True:
33+
chunk = fileobj.read(chunk_size)
34+
if not chunk:
35+
break
36+
yield chunk
37+
38+
3139
class CaptureRegion(object):
3240
"""Represents a region of a file we want to capture.
3341
@@ -176,10 +184,16 @@ def virtual_size(self):
176184
@property
177185
def actual_size(self):
178186
"""Returns the total size of the file, usually smaller than
179-
virtual_size.
187+
virtual_size. NOTE: this will only be accurate if the entire
188+
file is read and processed.
180189
"""
181190
return self._total_count
182191

192+
@property
193+
def complete(self):
194+
"""Returns True if we have all the information needed."""
195+
return all(r.complete for r in self._capture_regions.values())
196+
183197
def __str__(self):
184198
"""The string name of this file format."""
185199
return 'raw'
@@ -194,6 +208,35 @@ def context_info(self):
194208
return {name: len(region.data) for name, region in
195209
self._capture_regions.items()}
196210

211+
@classmethod
212+
def from_file(cls, filename):
213+
"""Read as much of a file as necessary to complete inspection.
214+
215+
NOTE: Because we only read as much of the file as necessary, the
216+
actual_size property will not reflect the size of the file, but the
217+
amount of data we read before we satisfied the inspector.
218+
219+
Raises ImageFormatError if we cannot parse the file.
220+
"""
221+
inspector = cls()
222+
with open(filename, 'rb') as f:
223+
for chunk in chunked_reader(f):
224+
inspector.eat_chunk(chunk)
225+
if inspector.complete:
226+
# No need to eat any more data
227+
break
228+
if not inspector.complete or not inspector.format_match:
229+
raise ImageFormatError('File is not in requested format')
230+
return inspector
231+
232+
def safety_check(self):
233+
"""Perform some checks to determine if this file is safe.
234+
235+
Returns True if safe, False otherwise. It may raise ImageFormatError
236+
if safety cannot be guaranteed because of parsing or other errors.
237+
"""
238+
return True
239+
197240

198241
# The qcow2 format consists of a big-endian 72-byte header, of which
199242
# only a small portion has information we care about:
@@ -202,15 +245,26 @@ def context_info(self):
202245
# 0 0x00 Magic 4-bytes 'QFI\xfb'
203246
# 4 0x04 Version (uint32_t, should always be 2 for modern files)
204247
# . . .
248+
# 8 0x08 Backing file offset (uint64_t)
205249
# 24 0x18 Size in bytes (unint64_t)
250+
# . . .
251+
# 72 0x48 Incompatible features bitfield (6 bytes)
206252
#
207-
# https://people.gnome.org/~markmc/qcow-image-format.html
253+
# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt
208254
class QcowInspector(FileInspector):
209255
"""QEMU QCOW2 Format
210256
211257
This should only require about 32 bytes of the beginning of the file
212-
to determine the virtual size.
258+
to determine the virtual size, and 104 bytes to perform the safety check.
213259
"""
260+
261+
BF_OFFSET = 0x08
262+
BF_OFFSET_LEN = 8
263+
I_FEATURES = 0x48
264+
I_FEATURES_LEN = 8
265+
I_FEATURES_DATAFILE_BIT = 3
266+
I_FEATURES_MAX_BIT = 4
267+
214268
def __init__(self, *a, **k):
215269
super(QcowInspector, self).__init__(*a, **k)
216270
self.new_region('header', CaptureRegion(0, 512))
@@ -220,6 +274,10 @@ def _qcow_header_data(self):
220274
struct.unpack('>4sIQIIQ', self.region('header').data[:32]))
221275
return magic, size
222276

277+
@property
278+
def has_header(self):
279+
return self.region('header').complete
280+
223281
@property
224282
def virtual_size(self):
225283
if not self.region('header').complete:
@@ -236,9 +294,77 @@ def format_match(self):
236294
magic, size = self._qcow_header_data()
237295
return magic == b'QFI\xFB'
238296

297+
@property
298+
def has_backing_file(self):
299+
if not self.region('header').complete:
300+
return None
301+
if not self.format_match:
302+
return False
303+
bf_offset_bytes = self.region('header').data[
304+
self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN]
305+
# nonzero means "has a backing file"
306+
bf_offset, = struct.unpack('>Q', bf_offset_bytes)
307+
return bf_offset != 0
308+
309+
@property
310+
def has_unknown_features(self):
311+
if not self.region('header').complete:
312+
return None
313+
if not self.format_match:
314+
return False
315+
i_features = self.region('header').data[
316+
self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
317+
318+
# This is the maximum byte number we should expect any bits to be set
319+
max_byte = self.I_FEATURES_MAX_BIT // 8
320+
321+
# The flag bytes are in big-endian ordering, so if we process
322+
# them in index-order, they're reversed
323+
for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))):
324+
if byte_num == max_byte:
325+
# If we're in the max-allowed byte, allow any bits less than
326+
# the maximum-known feature flag bit to be set
327+
allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1)
328+
elif byte_num > max_byte:
329+
# If we're above the byte with the maximum known feature flag
330+
# bit, then we expect all zeroes
331+
allow_mask = 0x0
332+
else:
333+
# Any earlier-than-the-maximum byte can have any of the flag
334+
# bits set
335+
allow_mask = 0xFF
336+
337+
if i_features[i] & ~allow_mask:
338+
LOG.warning('Found unknown feature bit in byte %i: %s/%s',
339+
byte_num, bin(i_features[byte_num] & ~allow_mask),
340+
bin(allow_mask))
341+
return True
342+
343+
return False
344+
345+
@property
346+
def has_data_file(self):
347+
if not self.region('header').complete:
348+
return None
349+
if not self.format_match:
350+
return False
351+
i_features = self.region('header').data[
352+
self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
353+
354+
# First byte of bitfield, which is i_features[7]
355+
byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8
356+
# Third bit of bitfield, which is 0x04
357+
bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8)
358+
return bool(i_features[byte] & bit)
359+
239360
def __str__(self):
240361
return 'qcow2'
241362

363+
def safety_check(self):
364+
return (not self.has_backing_file and
365+
not self.has_data_file and
366+
not self.has_unknown_features)
367+
242368

243369
# The VHD (or VPC as QEMU calls it) format consists of a big-endian
244370
# 512-byte "footer" at the beginning of the file with various

glance/tests/unit/common/test_format_inspector.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,28 @@ def tearDown(self):
5151
except Exception:
5252
pass
5353

54-
def _create_img(self, fmt, size, subformat=None):
54+
def _create_img(self, fmt, size, subformat=None, options=None,
55+
backing_file=None):
5556
if fmt == 'vhd':
5657
# QEMU calls the vhd format vpc
5758
fmt = 'vpc'
5859

60+
if options is None:
61+
options = {}
5962
opt = ''
6063
prefix = 'glance-unittest-formatinspector-'
6164

6265
if subformat:
63-
opt = ' -o subformat=%s' % subformat
66+
options['subformat'] = subformat
6467
prefix += subformat + '-'
6568

69+
if options:
70+
opt += '-o ' + ','.join('%s=%s' % (k, v)
71+
for k, v in options.items())
72+
73+
if backing_file is not None:
74+
opt += ' -b %s -F raw' % backing_file
75+
6676
fn = tempfile.mktemp(prefix=prefix,
6777
suffix='.%s' % fmt)
6878
self._created_files.append(fn)
@@ -160,6 +170,15 @@ def test_vmdk(self):
160170
def test_vmdk_stream_optimized(self):
161171
self._test_format('vmdk', 'streamOptimized')
162172

173+
def test_from_file_reads_minimum(self):
174+
img = self._create_img('qcow2', 10 * units.Mi)
175+
file_size = os.stat(img).st_size
176+
fmt = format_inspector.QcowInspector.from_file(img)
177+
# We know everything we need from the first 512 bytes of a QCOW image,
178+
# so make sure that we did not read the whole thing when we inspect
179+
# a local file.
180+
self.assertLess(fmt.actual_size, file_size)
181+
163182
def _test_vmdk_bad_descriptor_offset(self, subformat=None):
164183
format_name = 'vmdk'
165184
image_size = 10 * units.Mi
@@ -231,6 +250,65 @@ def test_vmdk_bad_descriptor_mem_limit(self):
231250
def test_vmdk_bad_descriptor_mem_limit_stream_optimized(self):
232251
self._test_vmdk_bad_descriptor_mem_limit(subformat='streamOptimized')
233252

253+
def test_qcow2_safety_checks(self):
254+
# Create backing and data-file names (and initialize the backing file)
255+
backing_fn = tempfile.mktemp(prefix='backing')
256+
self._created_files.append(backing_fn)
257+
with open(backing_fn, 'w') as f:
258+
f.write('foobar')
259+
data_fn = tempfile.mktemp(prefix='data')
260+
self._created_files.append(data_fn)
261+
262+
# A qcow with no backing or data file is safe
263+
fn = self._create_img('qcow2', 5 * units.Mi, None)
264+
inspector = format_inspector.QcowInspector.from_file(fn)
265+
self.assertTrue(inspector.safety_check())
266+
267+
# A backing file makes it unsafe
268+
fn = self._create_img('qcow2', 5 * units.Mi, None,
269+
backing_file=backing_fn)
270+
inspector = format_inspector.QcowInspector.from_file(fn)
271+
self.assertFalse(inspector.safety_check())
272+
273+
# Note(lajoskatona): This image create fails on bionic due to
274+
# old qemu-img utilities, let's skip this only test from yoga
275+
# A data-file makes it unsafe
276+
# fn = self._create_img('qcow2', 5 * units.Mi,
277+
# options={'data_file': data_fn,
278+
# 'data_file_raw': 'on'})
279+
# inspector = format_inspector.QcowInspector.from_file(fn)
280+
# self.assertFalse(inspector.safety_check())
281+
282+
# Trying to load a non-QCOW file is an error
283+
self.assertRaises(format_inspector.ImageFormatError,
284+
format_inspector.QcowInspector.from_file,
285+
backing_fn)
286+
287+
def test_qcow2_feature_flag_checks(self):
288+
data = bytearray(512)
289+
data[0:4] = b'QFI\xFB'
290+
inspector = format_inspector.QcowInspector()
291+
inspector.region('header').data = data
292+
293+
# All zeros, no feature flags - all good
294+
self.assertFalse(inspector.has_unknown_features)
295+
296+
# A feature flag set in the first byte (highest-order) is not
297+
# something we know about, so fail.
298+
data[0x48] = 0x01
299+
self.assertTrue(inspector.has_unknown_features)
300+
301+
# The first bit in the last byte (lowest-order) is known (the dirty
302+
# bit) so that should pass
303+
data[0x48] = 0x00
304+
data[0x4F] = 0x01
305+
self.assertFalse(inspector.has_unknown_features)
306+
307+
# Currently (as of 2024), the high-order feature flag bit in the low-
308+
# order byte is not assigned, so make sure we reject it.
309+
data[0x4F] = 0x80
310+
self.assertTrue(inspector.has_unknown_features)
311+
234312
def test_vdi(self):
235313
self._test_format('vdi')
236314

0 commit comments

Comments
 (0)