Skip to content

Commit 697291b

Browse files
author
wordofglass
committed
fetchart: improve error resilience of the Content-Type detection by file magic
1 parent 9968288 commit 697291b

File tree

2 files changed

+41
-35
lines changed

2 files changed

+41
-35
lines changed

beetsplug/fetchart.py

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -238,42 +238,47 @@ def fetch_image(self, candidate, extra):
238238
# rely on it. Instead validate the type using the file magic
239239
# and only then determine the extension.
240240
data = resp.iter_content(chunk_size=1024)
241-
try:
242-
# stream only a small part of the image to get its header
243-
chunk = next(data)
244-
except StopIteration:
245-
pass
241+
header = b''
242+
for chunk in data:
243+
header += chunk
244+
if len(header) >= 32:
245+
# The imghdr module will only read 32 bytes, and our
246+
# own additions in mediafile even less.
247+
break
246248
else:
247-
real_ct = _image_mime_type(chunk)
248-
if real_ct is None:
249-
# detection by file magic failed, fall back to the
250-
# server-supplied Content-Type
251-
# Is our type detection failsafe enough to drop this?
252-
real_ct = ct
253-
254-
if real_ct not in CONTENT_TYPES:
255-
self._log.debug(u'not a supported image: {}',
256-
real_ct or u'unknown content type')
257-
candidate.path = None
258-
return
259-
260-
ext = b'.' + CONTENT_TYPES[real_ct][0]
261-
if real_ct != ct:
262-
self._log.warn(u'Server specified {}, but returned a '
263-
u'{} image. Correcting the extension '
264-
u'to {}',
265-
ct, real_ct, ext)
266-
267-
with NamedTemporaryFile(suffix=ext, delete=False) as fh:
268-
# write the first already loaded part of the image
249+
# server didn't return enough data, i.e. corrupt image
250+
return
251+
252+
real_ct = _image_mime_type(header)
253+
if real_ct is None:
254+
# detection by file magic failed, fall back to the
255+
# server-supplied Content-Type
256+
# Is our type detection failsafe enough to drop this?
257+
real_ct = ct
258+
259+
if real_ct not in CONTENT_TYPES:
260+
self._log.debug(u'not a supported image: {}',
261+
real_ct or u'unknown content type')
262+
candidate.path = None
263+
return
264+
265+
ext = b'.' + CONTENT_TYPES[real_ct][0]
266+
267+
if real_ct != ct:
268+
self._log.warn(u'Server specified {}, but returned a '
269+
u'{} image. Correcting the extension '
270+
u'to {}',
271+
ct, real_ct, ext)
272+
273+
with NamedTemporaryFile(suffix=ext, delete=False) as fh:
274+
# write the first already loaded part of the image
275+
fh.write(header)
276+
# download the remaining part of the image
277+
for chunk in data:
269278
fh.write(chunk)
270-
# download the remaining part of the image
271-
for chunk in data:
272-
fh.write(chunk)
273-
self._log.debug(u'downloaded art to: {0}',
274-
util.displayable_path(fh.name))
275-
candidate.path = util.bytestring_path(fh.name)
276-
279+
self._log.debug(u'downloaded art to: {0}',
280+
util.displayable_path(fh.name))
281+
candidate.path = util.bytestring_path(fh.name)
277282
return
278283

279284
except (IOError, requests.RequestException, TypeError) as exc:

test/test_art.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ def mock_response(self, url, content_type='image/jpeg', file_type=None):
6060
file_type = content_type
6161
responses.add(responses.GET, url,
6262
content_type=content_type,
63-
body=IMAGEHEADER.get(file_type, b'\x00' * 32))
63+
# imghdr reads 32 bytes
64+
body=IMAGEHEADER.get(file_type, b'').ljust(32, b'\x00'))
6465

6566

6667
class FetchImageTest(FetchImageHelper, UseThePlugin):

0 commit comments

Comments
 (0)