Skip to content

Commit c6d5a68

Browse files
kk7dsmarkgoddard
authored andcommitted
Extend format_inspector for QCOW safety
This adds two properties to the QcowInspector that makes it able to indicate whether the file specifies a backing_file or data_file in the header. Both conditions are considered unsafe for our usage. To ease checking of this condition, a classmethod is added that takes a local filename and digests just enough of the file to assert that both conditions are false. Change-Id: Iaf86b525397d41bd116999cabe0954a0a7efac65 Related-Bug: #2059809 (cherry picked from commit ae536bb394793c9a7a219cb498e03d5c81dbbbb7) (cherry picked from commit 2eba54e0821106097dfeceb424e53943fd090483) (cherry picked from commit 89dbbc838d606f461087e1494d19ddbcf9db0a38)
1 parent 26c7b35 commit c6d5a68

File tree

2 files changed

+214
-5
lines changed

2 files changed

+214
-5
lines changed

glance/common/format_inspector.py

Lines changed: 129 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@
2828
LOG = logging.getLogger(__name__)
2929

3030

31+
def chunked_reader(fileobj, chunk_size=512):
32+
while True:
33+
chunk = fileobj.read(chunk_size)
34+
if not chunk:
35+
break
36+
yield chunk
37+
38+
3139
class CaptureRegion(object):
3240
"""Represents a region of a file we want to capture.
3341
@@ -176,10 +184,16 @@ def virtual_size(self):
176184
@property
177185
def actual_size(self):
178186
"""Returns the total size of the file, usually smaller than
179-
virtual_size.
187+
virtual_size. NOTE: this will only be accurate if the entire
188+
file is read and processed.
180189
"""
181190
return self._total_count
182191

192+
@property
193+
def complete(self):
194+
"""Returns True if we have all the information needed."""
195+
return all(r.complete for r in self._capture_regions.values())
196+
183197
def __str__(self):
184198
"""The string name of this file format."""
185199
return 'raw'
@@ -194,6 +208,35 @@ def context_info(self):
194208
return {name: len(region.data) for name, region in
195209
self._capture_regions.items()}
196210

211+
@classmethod
212+
def from_file(cls, filename):
213+
"""Read as much of a file as necessary to complete inspection.
214+
215+
NOTE: Because we only read as much of the file as necessary, the
216+
actual_size property will not reflect the size of the file, but the
217+
amount of data we read before we satisfied the inspector.
218+
219+
Raises ImageFormatError if we cannot parse the file.
220+
"""
221+
inspector = cls()
222+
with open(filename, 'rb') as f:
223+
for chunk in chunked_reader(f):
224+
inspector.eat_chunk(chunk)
225+
if inspector.complete:
226+
# No need to eat any more data
227+
break
228+
if not inspector.complete or not inspector.format_match:
229+
raise ImageFormatError('File is not in requested format')
230+
return inspector
231+
232+
def safety_check(self):
233+
"""Perform some checks to determine if this file is safe.
234+
235+
Returns True if safe, False otherwise. It may raise ImageFormatError
236+
if safety cannot be guaranteed because of parsing or other errors.
237+
"""
238+
return True
239+
197240

198241
# The qcow2 format consists of a big-endian 72-byte header, of which
199242
# only a small portion has information we care about:
@@ -202,15 +245,26 @@ def context_info(self):
202245
# 0 0x00 Magic 4-bytes 'QFI\xfb'
203246
# 4 0x04 Version (uint32_t, should always be 2 for modern files)
204247
# . . .
248+
# 8 0x08 Backing file offset (uint64_t)
205249
# 24 0x18 Size in bytes (unint64_t)
250+
# . . .
251+
# 72 0x48 Incompatible features bitfield (6 bytes)
206252
#
207-
# https://people.gnome.org/~markmc/qcow-image-format.html
253+
# https://gitlab.com/qemu-project/qemu/-/blob/master/docs/interop/qcow2.txt
208254
class QcowInspector(FileInspector):
209255
"""QEMU QCOW2 Format
210256
211257
This should only require about 32 bytes of the beginning of the file
212-
to determine the virtual size.
258+
to determine the virtual size, and 104 bytes to perform the safety check.
213259
"""
260+
261+
BF_OFFSET = 0x08
262+
BF_OFFSET_LEN = 8
263+
I_FEATURES = 0x48
264+
I_FEATURES_LEN = 8
265+
I_FEATURES_DATAFILE_BIT = 3
266+
I_FEATURES_MAX_BIT = 4
267+
214268
def __init__(self, *a, **k):
215269
super(QcowInspector, self).__init__(*a, **k)
216270
self.new_region('header', CaptureRegion(0, 512))
@@ -220,6 +274,10 @@ def _qcow_header_data(self):
220274
struct.unpack('>4sIQIIQ', self.region('header').data[:32]))
221275
return magic, size
222276

277+
@property
278+
def has_header(self):
279+
return self.region('header').complete
280+
223281
@property
224282
def virtual_size(self):
225283
if not self.region('header').complete:
@@ -236,9 +294,77 @@ def format_match(self):
236294
magic, size = self._qcow_header_data()
237295
return magic == b'QFI\xFB'
238296

297+
@property
298+
def has_backing_file(self):
299+
if not self.region('header').complete:
300+
return None
301+
if not self.format_match:
302+
return False
303+
bf_offset_bytes = self.region('header').data[
304+
self.BF_OFFSET:self.BF_OFFSET + self.BF_OFFSET_LEN]
305+
# nonzero means "has a backing file"
306+
bf_offset, = struct.unpack('>Q', bf_offset_bytes)
307+
return bf_offset != 0
308+
309+
@property
310+
def has_unknown_features(self):
311+
if not self.region('header').complete:
312+
return None
313+
if not self.format_match:
314+
return False
315+
i_features = self.region('header').data[
316+
self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
317+
318+
# This is the maximum byte number we should expect any bits to be set
319+
max_byte = self.I_FEATURES_MAX_BIT // 8
320+
321+
# The flag bytes are in big-endian ordering, so if we process
322+
# them in index-order, they're reversed
323+
for i, byte_num in enumerate(reversed(range(self.I_FEATURES_LEN))):
324+
if byte_num == max_byte:
325+
# If we're in the max-allowed byte, allow any bits less than
326+
# the maximum-known feature flag bit to be set
327+
allow_mask = ((1 << self.I_FEATURES_MAX_BIT) - 1)
328+
elif byte_num > max_byte:
329+
# If we're above the byte with the maximum known feature flag
330+
# bit, then we expect all zeroes
331+
allow_mask = 0x0
332+
else:
333+
# Any earlier-than-the-maximum byte can have any of the flag
334+
# bits set
335+
allow_mask = 0xFF
336+
337+
if i_features[i] & ~allow_mask:
338+
LOG.warning('Found unknown feature bit in byte %i: %s/%s',
339+
byte_num, bin(i_features[byte_num] & ~allow_mask),
340+
bin(allow_mask))
341+
return True
342+
343+
return False
344+
345+
@property
346+
def has_data_file(self):
347+
if not self.region('header').complete:
348+
return None
349+
if not self.format_match:
350+
return False
351+
i_features = self.region('header').data[
352+
self.I_FEATURES:self.I_FEATURES + self.I_FEATURES_LEN]
353+
354+
# First byte of bitfield, which is i_features[7]
355+
byte = self.I_FEATURES_LEN - 1 - self.I_FEATURES_DATAFILE_BIT // 8
356+
# Third bit of bitfield, which is 0x04
357+
bit = 1 << (self.I_FEATURES_DATAFILE_BIT - 1 % 8)
358+
return bool(i_features[byte] & bit)
359+
239360
def __str__(self):
240361
return 'qcow2'
241362

363+
def safety_check(self):
364+
return (not self.has_backing_file and
365+
not self.has_data_file and
366+
not self.has_unknown_features)
367+
242368

243369
# The VHD (or VPC as QEMU calls it) format consists of a big-endian
244370
# 512-byte "footer" at the beginning of the file with various

glance/tests/unit/common/test_format_inspector.py

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,29 @@ def tearDown(self):
5050
except Exception:
5151
pass
5252

53-
def _create_img(self, fmt, size):
53+
def _create_img(self, fmt, size, subformat=None, options=None,
54+
backing_file=None):
5455
if fmt == 'vhd':
5556
# QEMU calls the vhd format vpc
5657
fmt = 'vpc'
5758

58-
fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-',
59+
if options is None:
60+
options = {}
61+
opt = ''
62+
prefix = 'glance-unittest-formatinspector-'
63+
64+
if subformat:
65+
options['subformat'] = subformat
66+
prefix += subformat + '-'
67+
68+
if options:
69+
opt += '-o ' + ','.join('%s=%s' % (k, v)
70+
for k, v in options.items())
71+
72+
if backing_file is not None:
73+
opt += ' -b %s -F raw' % backing_file
74+
75+
fn = tempfile.mktemp(prefix=prefix,
5976
suffix='.%s' % fmt)
6077
self._created_files.append(fn)
6178
subprocess.check_output(
@@ -119,6 +136,72 @@ def test_vhdx(self):
119136
def test_vmdk(self):
120137
self._test_format('vmdk')
121138

139+
def test_from_file_reads_minimum(self):
140+
img = self._create_img('qcow2', 10 * units.Mi)
141+
file_size = os.stat(img).st_size
142+
fmt = format_inspector.QcowInspector.from_file(img)
143+
# We know everything we need from the first 512 bytes of a QCOW image,
144+
# so make sure that we did not read the whole thing when we inspect
145+
# a local file.
146+
self.assertLess(fmt.actual_size, file_size)
147+
148+
def test_qcow2_safety_checks(self):
149+
# Create backing and data-file names (and initialize the backing file)
150+
backing_fn = tempfile.mktemp(prefix='backing')
151+
self._created_files.append(backing_fn)
152+
with open(backing_fn, 'w') as f:
153+
f.write('foobar')
154+
data_fn = tempfile.mktemp(prefix='data')
155+
self._created_files.append(data_fn)
156+
157+
# A qcow with no backing or data file is safe
158+
fn = self._create_img('qcow2', 5 * units.Mi, None)
159+
inspector = format_inspector.QcowInspector.from_file(fn)
160+
self.assertTrue(inspector.safety_check())
161+
162+
# A backing file makes it unsafe
163+
fn = self._create_img('qcow2', 5 * units.Mi, None,
164+
backing_file=backing_fn)
165+
inspector = format_inspector.QcowInspector.from_file(fn)
166+
self.assertFalse(inspector.safety_check())
167+
168+
# A data-file makes it unsafe
169+
fn = self._create_img('qcow2', 5 * units.Mi,
170+
options={'data_file': data_fn,
171+
'data_file_raw': 'on'})
172+
inspector = format_inspector.QcowInspector.from_file(fn)
173+
self.assertFalse(inspector.safety_check())
174+
175+
# Trying to load a non-QCOW file is an error
176+
self.assertRaises(format_inspector.ImageFormatError,
177+
format_inspector.QcowInspector.from_file,
178+
backing_fn)
179+
180+
def test_qcow2_feature_flag_checks(self):
181+
data = bytearray(512)
182+
data[0:4] = b'QFI\xFB'
183+
inspector = format_inspector.QcowInspector()
184+
inspector.region('header').data = data
185+
186+
# All zeros, no feature flags - all good
187+
self.assertFalse(inspector.has_unknown_features)
188+
189+
# A feature flag set in the first byte (highest-order) is not
190+
# something we know about, so fail.
191+
data[0x48] = 0x01
192+
self.assertTrue(inspector.has_unknown_features)
193+
194+
# The first bit in the last byte (lowest-order) is known (the dirty
195+
# bit) so that should pass
196+
data[0x48] = 0x00
197+
data[0x4F] = 0x01
198+
self.assertFalse(inspector.has_unknown_features)
199+
200+
# Currently (as of 2024), the high-order feature flag bit in the low-
201+
# order byte is not assigned, so make sure we reject it.
202+
data[0x4F] = 0x80
203+
self.assertTrue(inspector.has_unknown_features)
204+
122205
def test_vdi(self):
123206
self._test_format('vdi')
124207

0 commit comments

Comments
 (0)