From 3e25ba2323cd1c815819d022d9f01d44e4920ef2 Mon Sep 17 00:00:00 2001 From: Alon Diamant Date: Wed, 17 Jun 2020 23:10:57 +0300 Subject: [PATCH 1/3] Added support for receiving HTML input Fixed issue with dimensions(tag) not working for some websites --- CHANGELOG.rst | 6 ++++++ src/favicon/__init__.py | 2 +- src/favicon/favicon.py | 20 ++++++++++++++------ tests/test_favicon.py | 13 +++++++++++++ 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c827931..d04a92b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ Changelog ========= +0.7.1 (2020-06-17) +------------------ + +* Added support for receiving HTML input +* Fixed issue with dimensions(tag) not working for some websites + 0.7.0 (2019-08-31) ------------------ diff --git a/src/favicon/__init__.py b/src/favicon/__init__.py index fe777f5..e78c4a5 100644 --- a/src/favicon/__init__.py +++ b/src/favicon/__init__.py @@ -8,7 +8,7 @@ __all__ = ["get", "Icon"] __title__ = "favicon" -__version__ = "0.7.0" +__version__ = "0.7.1" __author__ = "Scott Werner" __license__ = "MIT" __copyright__ = "Copyright 2019 Scott Werner" diff --git a/src/favicon/favicon.py b/src/favicon/favicon.py index 5730f91..55f1d69 100644 --- a/src/favicon/favicon.py +++ b/src/favicon/favicon.py @@ -40,12 +40,14 @@ Icon = namedtuple('Icon', ['url', 'width', 'height', 'format']) -def get(url, *args, **request_kwargs): +def get(url, *args, html=None, **request_kwargs): """Get all fav icons for a url. :param url: Homepage. :type url: str + :param html: HTML input, as string. Will be used instead of an HTTP response from the `url`. + :param request_kwargs: Request headers argument. :type request_kwargs: Dict @@ -62,16 +64,21 @@ def get(url, *args, **request_kwargs): request_kwargs.setdefault('headers', HEADERS) request_kwargs.setdefault('allow_redirects', True) - response = requests.get(url, **request_kwargs) - response.raise_for_status() + if html is None: + response = requests.get(url, **request_kwargs) + response.raise_for_status() + final_url = response.url + html = response.text + else: + final_url = url icons = set() - default_icon = default(response.url, **request_kwargs) + default_icon = default(final_url, **request_kwargs) if default_icon: icons.add(default_icon) - link_icons = tags(response.url, response.text) + link_icons = tags(final_url, html) if link_icons: icons.update(link_icons) @@ -182,7 +189,7 @@ def dimensions(tag): if sizes and sizes != 'any': size = sizes.split(' ') # '16x16 32x32 64x64' size.sort(reverse=True) - width, height = re.split(r'[x\xd7]', size[0]) + width, height = re.split(r'[x\xd7/]', size[0]) else: filename = tag.get('href') or tag.get('content') size = SIZE_RE.search(filename) @@ -195,3 +202,4 @@ def dimensions(tag): width = ''.join(c for c in width if c.isdigit()) height = ''.join(c for c in height if c.isdigit()) return int(width), int(height) + diff --git a/tests/test_favicon.py b/tests/test_favicon.py index bc91e4e..02662cf 100644 --- a/tests/test_favicon.py +++ b/tests/test_favicon.py @@ -201,3 +201,16 @@ def test_request_kwargs(m): ) def test_is_absolute_helper(url, expected): assert is_absolute(url) == expected + +def test_html_input(): + # contents of mock.com + mock_com_html = ''' Home - MOCK.com

© 2020 MOCK.com

''' # noqa + + icons = favicon.get( + 'http://mock.com/', + html=mock_com_html, + ) + assert icons + assert icons[0].url == 'http://mock.com/favicon.ico' + assert icons[1].url == 'http://mock.com/wp-content/uploads/2014/03/favicon.ico' + print(icons) \ No newline at end of file From 66382c7938d90b53c35f3aafe089e2d5cedf52b6 Mon Sep 17 00:00:00 2001 From: Alon Diamant Date: Thu, 18 Jun 2020 12:27:50 +0300 Subject: [PATCH 2/3] Change: html parameter renamed to html_override Change: test corrected and minimized --- src/favicon/favicon.py | 11 ++++++----- tests/test_favicon.py | 5 ++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/favicon/favicon.py b/src/favicon/favicon.py index 55f1d69..ced8c34 100644 --- a/src/favicon/favicon.py +++ b/src/favicon/favicon.py @@ -40,13 +40,14 @@ Icon = namedtuple('Icon', ['url', 'width', 'height', 'format']) -def get(url, *args, html=None, **request_kwargs): +def get(url, *args, html_override=None, **request_kwargs): """Get all fav icons for a url. :param url: Homepage. :type url: str - :param html: HTML input, as string. Will be used instead of an HTTP response from the `url`. + :param html_override: HTML input, as string. Will be used instead of an HTTP response from the `url`. + :type html_override: str or None :param request_kwargs: Request headers argument. :type request_kwargs: Dict @@ -64,11 +65,11 @@ def get(url, *args, html=None, **request_kwargs): request_kwargs.setdefault('headers', HEADERS) request_kwargs.setdefault('allow_redirects', True) - if html is None: + if html_override is None: response = requests.get(url, **request_kwargs) response.raise_for_status() final_url = response.url - html = response.text + html_override = response.text else: final_url = url @@ -78,7 +79,7 @@ def get(url, *args, html=None, **request_kwargs): if default_icon: icons.add(default_icon) - link_icons = tags(final_url, html) + link_icons = tags(final_url, html_override) if link_icons: icons.update(link_icons) diff --git a/tests/test_favicon.py b/tests/test_favicon.py index 02662cf..8b32cc1 100644 --- a/tests/test_favicon.py +++ b/tests/test_favicon.py @@ -204,13 +204,12 @@ def test_is_absolute_helper(url, expected): def test_html_input(): # contents of mock.com - mock_com_html = ''' Home - MOCK.com ''' # noqa + mock_com_html = ''' Home - MOCK.com Test''' # noqa icons = favicon.get( 'http://mock.com/', - html=mock_com_html, + html_override=mock_com_html, ) assert icons assert icons[0].url == 'http://mock.com/favicon.ico' assert icons[1].url == 'http://mock.com/wp-content/uploads/2014/03/favicon.ico' - print(icons) \ No newline at end of file From 13bc8ce3b96a1a8b64c0b8b68ba3caa519bf3108 Mon Sep 17 00:00:00 2001 From: Alon Diamant Date: Sat, 27 Jun 2020 17:46:31 +0300 Subject: [PATCH 3/3] * Added option for disabling verification by HTTP request of default /favicon.ico file, to have similar behavior to tag-based favicon results. * Fixed flaky test. --- AUTHORS.rst | 2 +- CHANGELOG.rst | 7 +++++++ src/favicon/__init__.py | 2 +- src/favicon/favicon.py | 23 ++++++++++++++++++----- tests/test_favicon.py | 2 ++ 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index a927260..2415139 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -9,4 +9,4 @@ Lead Contributors ------------ -.. * +.. * Alon Diamant `@advance512 `_ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d04a92b..a788cb3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,13 @@ Changelog ========= +0.7.2 (2020-06-27) +------------------ + +* Added option for disabling verification by HTTP request of default /favicon.ico file, to have similar behavior + to tag-based favicon results. +* Fixed flaky test. + 0.7.1 (2020-06-17) ------------------ diff --git a/src/favicon/__init__.py b/src/favicon/__init__.py index e78c4a5..999f642 100644 --- a/src/favicon/__init__.py +++ b/src/favicon/__init__.py @@ -8,7 +8,7 @@ __all__ = ["get", "Icon"] __title__ = "favicon" -__version__ = "0.7.1" +__version__ = "0.7.2" __author__ = "Scott Werner" __license__ = "MIT" __copyright__ = "Copyright 2019 Scott Werner" diff --git a/src/favicon/favicon.py b/src/favicon/favicon.py index ced8c34..0f183fb 100644 --- a/src/favicon/favicon.py +++ b/src/favicon/favicon.py @@ -40,15 +40,21 @@ Icon = namedtuple('Icon', ['url', 'width', 'height', 'format']) -def get(url, *args, html_override=None, **request_kwargs): +def get(url, *args, html_override=None, verify_default_icon=True, **request_kwargs): """Get all fav icons for a url. :param url: Homepage. :type url: str - :param html_override: HTML input, as string. Will be used instead of an HTTP response from the `url`. + :param html_override: HTML input, as string. Will be used instead of an HTTP response from + the `url`. :type html_override: str or None + :param verify_default_icon: Whether to verify the existence of the default + https://www.domain.com/favicon.ico file, or to return it as a probable result which + still requires verification, like the other results. + :type verify_default_icon: bool + :param request_kwargs: Request headers argument. :type request_kwargs: Dict @@ -75,7 +81,7 @@ def get(url, *args, html_override=None, **request_kwargs): icons = set() - default_icon = default(final_url, **request_kwargs) + default_icon = default(final_url, verify_default_icon, **request_kwargs) if default_icon: icons.add(default_icon) @@ -86,12 +92,17 @@ def get(url, *args, html_override=None, **request_kwargs): return sorted(icons, key=lambda i: i.width + i.height, reverse=True) -def default(url, **request_kwargs): +def default(url, verify_default_icon, **request_kwargs): """Get icon using default filename favicon.ico. :param url: Url for site. :type url: str + :param verify_default_icon: Whether to verify the existence of the default + https://www.domain.com/favicon.ico file, or to return it as a probable result + which still requires verification, like the other results. + :type verify_default_icon: bool + :param request_kwargs: Request headers argument. :type request_kwargs: Dict @@ -100,6 +111,9 @@ def default(url, **request_kwargs): """ parsed = urlparse(url) favicon_url = urlunparse((parsed.scheme, parsed.netloc, 'favicon.ico', '', '', '')) + if not verify_default_icon: + return Icon(favicon_url, 0, 0, 'ico') + response = requests.head(favicon_url, **request_kwargs) if response.status_code == 200: return Icon(response.url, 0, 0, 'ico') @@ -203,4 +217,3 @@ def dimensions(tag): width = ''.join(c for c in width if c.isdigit()) height = ''.join(c for c in height if c.isdigit()) return int(width), int(height) - diff --git a/tests/test_favicon.py b/tests/test_favicon.py index 8b32cc1..7d3f8c7 100644 --- a/tests/test_favicon.py +++ b/tests/test_favicon.py @@ -202,6 +202,7 @@ def test_request_kwargs(m): def test_is_absolute_helper(url, expected): assert is_absolute(url) == expected + def test_html_input(): # contents of mock.com mock_com_html = ''' Home - MOCK.com Test''' # noqa @@ -211,5 +212,6 @@ def test_html_input(): html_override=mock_com_html, ) assert icons + icons.sort(key=lambda icon: icon.url) assert icons[0].url == 'http://mock.com/favicon.ico' assert icons[1].url == 'http://mock.com/wp-content/uploads/2014/03/favicon.ico'