diff --git a/AUTHORS.rst b/AUTHORS.rst index a927260..2415139 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -9,4 +9,4 @@ Lead Contributors ------------ -.. * +.. * Alon Diamant `@advance512 `_ diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c827931..a788cb3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,19 @@ Changelog ========= +0.7.2 (2020-06-27) +------------------ + +* Added option for disabling verification by HTTP request of default /favicon.ico file, to have similar behavior + to tag-based favicon results. +* Fixed flaky test. + +0.7.1 (2020-06-17) +------------------ + +* Added support for receiving HTML input +* Fixed issue with dimensions(tag) not working for some websites + 0.7.0 (2019-08-31) ------------------ diff --git a/src/favicon/__init__.py b/src/favicon/__init__.py index fe777f5..999f642 100644 --- a/src/favicon/__init__.py +++ b/src/favicon/__init__.py @@ -8,7 +8,7 @@ __all__ = ["get", "Icon"] __title__ = "favicon" -__version__ = "0.7.0" +__version__ = "0.7.2" __author__ = "Scott Werner" __license__ = "MIT" __copyright__ = "Copyright 2019 Scott Werner" diff --git a/src/favicon/favicon.py b/src/favicon/favicon.py index 5730f91..0f183fb 100644 --- a/src/favicon/favicon.py +++ b/src/favicon/favicon.py @@ -40,12 +40,21 @@ Icon = namedtuple('Icon', ['url', 'width', 'height', 'format']) -def get(url, *args, **request_kwargs): +def get(url, *args, html_override=None, verify_default_icon=True, **request_kwargs): """Get all fav icons for a url. :param url: Homepage. :type url: str + :param html_override: HTML input, as string. Will be used instead of an HTTP response from + the `url`. + :type html_override: str or None + + :param verify_default_icon: Whether to verify the existence of the default + https://www.domain.com/favicon.ico file, or to return it as a probable result which + still requires verification, like the other results. + :type verify_default_icon: bool + :param request_kwargs: Request headers argument. :type request_kwargs: Dict @@ -62,28 +71,38 @@ def get(url, *args, **request_kwargs): request_kwargs.setdefault('headers', HEADERS) request_kwargs.setdefault('allow_redirects', True) - response = requests.get(url, **request_kwargs) - response.raise_for_status() + if html_override is None: + response = requests.get(url, **request_kwargs) + response.raise_for_status() + final_url = response.url + html_override = response.text + else: + final_url = url icons = set() - default_icon = default(response.url, **request_kwargs) + default_icon = default(final_url, verify_default_icon, **request_kwargs) if default_icon: icons.add(default_icon) - link_icons = tags(response.url, response.text) + link_icons = tags(final_url, html_override) if link_icons: icons.update(link_icons) return sorted(icons, key=lambda i: i.width + i.height, reverse=True) -def default(url, **request_kwargs): +def default(url, verify_default_icon, **request_kwargs): """Get icon using default filename favicon.ico. :param url: Url for site. :type url: str + :param verify_default_icon: Whether to verify the existence of the default + https://www.domain.com/favicon.ico file, or to return it as a probable result + which still requires verification, like the other results. + :type verify_default_icon: bool + :param request_kwargs: Request headers argument. :type request_kwargs: Dict @@ -92,6 +111,9 @@ def default(url, **request_kwargs): """ parsed = urlparse(url) favicon_url = urlunparse((parsed.scheme, parsed.netloc, 'favicon.ico', '', '', '')) + if not verify_default_icon: + return Icon(favicon_url, 0, 0, 'ico') + response = requests.head(favicon_url, **request_kwargs) if response.status_code == 200: return Icon(response.url, 0, 0, 'ico') @@ -182,7 +204,7 @@ def dimensions(tag): if sizes and sizes != 'any': size = sizes.split(' ') # '16x16 32x32 64x64' size.sort(reverse=True) - width, height = re.split(r'[x\xd7]', size[0]) + width, height = re.split(r'[x\xd7/]', size[0]) else: filename = tag.get('href') or tag.get('content') size = SIZE_RE.search(filename) diff --git a/tests/test_favicon.py b/tests/test_favicon.py index bc91e4e..7d3f8c7 100644 --- a/tests/test_favicon.py +++ b/tests/test_favicon.py @@ -201,3 +201,17 @@ def test_request_kwargs(m): ) def test_is_absolute_helper(url, expected): assert is_absolute(url) == expected + + +def test_html_input(): + # contents of mock.com + mock_com_html = ''' Home - MOCK.com Test''' # noqa + + icons = favicon.get( + 'http://mock.com/', + html_override=mock_com_html, + ) + assert icons + icons.sort(key=lambda icon: icon.url) + assert icons[0].url == 'http://mock.com/favicon.ico' + assert icons[1].url == 'http://mock.com/wp-content/uploads/2014/03/favicon.ico'