Skip to content

Commit 86ed8e9

Browse files
author
wordofglass
authored
Merge pull request #2072 from beetbox/art_rectify_extension_via_imghdr
Rectify artwork extension via imghdr
2 parents a1ed781 + 7bf6554 commit 86ed8e9

File tree

3 files changed

+75
-32
lines changed

3 files changed

+75
-32
lines changed

beetsplug/fetchart.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from beets import ui
3030
from beets import util
3131
from beets import config
32+
from beets.mediafile import _image_mime_type
3233
from beets.util.artresizer import ArtResizer
3334
from beets.util import confit
3435
from beets.util import syspath, bytestring_path
@@ -230,18 +231,49 @@ def fetch_image(self, candidate, extra):
230231
with closing(self.request(candidate.url, stream=True,
231232
message=u'downloading image')) as resp:
232233
ct = resp.headers.get('Content-Type', None)
233-
if ct not in CONTENT_TYPES:
234-
self._log.debug(
235-
u'not a supported image: {}',
236-
resp.headers.get('Content-Type') or u'no content type',
237-
)
238-
candidate.path = None
234+
235+
# Download the image to a temporary file. As some servers
236+
# (notably fanart.tv) have proven to return wrong Content-Types
237+
# when images were uploaded with a bad file extension, do not
238+
# rely on it. Instead validate the type using the file magic
239+
# and only then determine the extension.
240+
data = resp.iter_content(chunk_size=1024)
241+
header = b''
242+
for chunk in data:
243+
header += chunk
244+
if len(header) >= 32:
245+
# The imghdr module will only read 32 bytes, and our
246+
# own additions in mediafile even less.
247+
break
248+
else:
249+
# server didn't return enough data, i.e. corrupt image
250+
return
251+
252+
real_ct = _image_mime_type(header)
253+
if real_ct is None:
254+
# detection by file magic failed, fall back to the
255+
# server-supplied Content-Type
256+
# Is our type detection failsafe enough to drop this?
257+
real_ct = ct
258+
259+
if real_ct not in CONTENT_TYPES:
260+
self._log.debug(u'not a supported image: {}',
261+
real_ct or u'unknown content type')
239262
return
240263

241-
# Generate a temporary file with the correct extension.
242-
with NamedTemporaryFile(suffix=b'.' + CONTENT_TYPES[ct][0],
243-
delete=False) as fh:
244-
for chunk in resp.iter_content(chunk_size=1024):
264+
ext = b'.' + CONTENT_TYPES[real_ct][0]
265+
266+
if real_ct != ct:
267+
self._log.warn(u'Server specified {}, but returned a '
268+
u'{} image. Correcting the extension '
269+
u'to {}',
270+
ct, real_ct, ext)
271+
272+
with NamedTemporaryFile(suffix=ext, delete=False) as fh:
273+
# write the first already loaded part of the image
274+
fh.write(header)
275+
# download the remaining part of the image
276+
for chunk in data:
245277
fh.write(chunk)
246278
self._log.debug(u'downloaded art to: {0}',
247279
util.displayable_path(fh.name))
@@ -252,7 +284,6 @@ def fetch_image(self, candidate, extra):
252284
# Handling TypeError works around a urllib3 bug:
253285
# https://github.com/shazow/urllib3/issues/556
254286
self._log.debug(u'error fetching art: {}', exc)
255-
candidate.path = None
256287
return
257288

258289

docs/changelog.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ Fixes:
4040
* Fix a crash when specifying non-ASCII format strings on the command line
4141
with the ``-f`` option to many commands. :bug:`2063`
4242
* :doc:`/plugins/fetchart`: Determine the file extension for downloaded images
43-
based on the ``Content-Type`` header instead of hardcoding it to .jpg.
44-
Reported in :bug:`2053`, which for now it does not fix due to
45-
server-side issues, though.
43+
based on the image's magic bytes and warn if result is not consistent with
44+
the server-supplied ``Content-Type`` header instead of
45+
hardcoding it to .jpg. :bug:`2053`
4646

4747
1.3.18 (May 31, 2016)
4848
---------------------

test/test_art.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,29 @@ def setUp(self):
4545
self.plugin = fetchart.FetchArtPlugin()
4646

4747

48-
class FetchImageTest(UseThePlugin):
49-
URL = 'http://example.com/test.jpg'
50-
48+
class FetchImageHelper(_common.TestCase):
49+
"""Helper mixin for mocking requests when fetching images
50+
with remote art sources.
51+
"""
5152
@responses.activate
5253
def run(self, *args, **kwargs):
53-
super(FetchImageTest, self).run(*args, **kwargs)
54+
super(FetchImageHelper, self).run(*args, **kwargs)
55+
56+
IMAGEHEADER = {'image/jpeg': b'\x00' * 6 + b'JFIF',
57+
'image/png': b'\211PNG\r\n\032\n', }
5458

55-
def mock_response(self, content_type):
56-
responses.add(responses.GET, self.URL,
57-
content_type=content_type)
59+
def mock_response(self, url, content_type='image/jpeg', file_type=None):
60+
if file_type is None:
61+
file_type = content_type
62+
responses.add(responses.GET, url,
63+
content_type=content_type,
64+
# imghdr reads 32 bytes
65+
body=self.IMAGEHEADER.get(
66+
file_type, b'').ljust(32, b'\x00'))
67+
68+
69+
class FetchImageTest(FetchImageHelper, UseThePlugin):
70+
URL = 'http://example.com/test.jpg'
5871

5972
def setUp(self):
6073
super(FetchImageTest, self).setUp()
@@ -64,17 +77,23 @@ def setUp(self):
6477
self.candidate = fetchart.Candidate(logger, url=self.URL)
6578

6679
def test_invalid_type_returns_none(self):
67-
self.mock_response('image/watercolour')
80+
self.mock_response(self.URL, 'image/watercolour')
6881
self.source.fetch_image(self.candidate, self.extra)
6982
self.assertEqual(self.candidate.path, None)
7083

7184
def test_jpeg_type_returns_path(self):
72-
self.mock_response('image/jpeg')
85+
self.mock_response(self.URL, 'image/jpeg')
7386
self.source.fetch_image(self.candidate, self.extra)
7487
self.assertNotEqual(self.candidate.path, None)
7588

7689
def test_extension_set_by_content_type(self):
77-
self.mock_response('image/png')
90+
self.mock_response(self.URL, 'image/png')
91+
self.source.fetch_image(self.candidate, self.extra)
92+
self.assertEqual(os.path.splitext(self.candidate.path)[1], b'.png')
93+
self.assertExists(self.candidate.path)
94+
95+
def test_does_not_rely_on_server_content_type(self):
96+
self.mock_response(self.URL, 'image/jpeg', 'image/png')
7897
self.source.fetch_image(self.candidate, self.extra)
7998
self.assertEqual(os.path.splitext(self.candidate.path)[1], b'.png')
8099
self.assertExists(self.candidate.path)
@@ -128,7 +147,7 @@ def test_precedence_amongst_correct_files(self):
128147
self.assertEqual(candidates, paths)
129148

130149

131-
class CombinedTest(UseThePlugin):
150+
class CombinedTest(FetchImageHelper, UseThePlugin):
132151
ASIN = 'xxxx'
133152
MBID = 'releaseid'
134153
AMAZON_URL = 'http://images.amazon.com/images/P/{0}.01.LZZZZZZZ.jpg' \
@@ -143,13 +162,6 @@ def setUp(self):
143162
self.dpath = os.path.join(self.temp_dir, b'arttest')
144163
os.mkdir(self.dpath)
145164

146-
@responses.activate
147-
def run(self, *args, **kwargs):
148-
super(CombinedTest, self).run(*args, **kwargs)
149-
150-
def mock_response(self, url, content_type='image/jpeg'):
151-
responses.add(responses.GET, url, content_type=content_type)
152-
153165
def test_main_interface_returns_amazon_art(self):
154166
self.mock_response(self.AMAZON_URL)
155167
album = _common.Bag(asin=self.ASIN)

0 commit comments

Comments
 (0)