From 13cc93866149a6cf665b619e8e7e5928356ad3b9 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Sun, 7 Aug 2022 00:44:33 +0500 Subject: [PATCH 1/5] switch from pytest-flake8 to raw flake8; clean up ignores pytest-flake8 doesn't work with latest flake --- .flake8 | 13 +++++++++++++ pytest.ini | 17 ----------------- tests/test_encoding.py | 5 +++-- tox.ini | 9 ++++++--- w3lib/encoding.py | 6 +++--- w3lib/url.py | 6 +++--- 6 files changed, 28 insertions(+), 28 deletions(-) create mode 100644 .flake8 diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..5a66e33f --- /dev/null +++ b/.flake8 @@ -0,0 +1,13 @@ +[flake8] +ignore = + # Refers to the max-line length. Let's suppress the error and simply + # let black take care on how it wants to format the lines. + E501, + + # Refers to "line break before/after binary operator". + # Similar to above, let black take care of the formatting. + W503, + W504, + + # It seems flake8 can misfire on it + E401, # multiple imports on one line diff --git a/pytest.ini b/pytest.ini index 68cfd625..515b2196 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,19 +1,2 @@ [pytest] doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES -flake8-max-line-length = 88 -flake8-ignore = - W503 # https://www.flake8rules.com/rules/W503.html - E203 # https://www.flake8rules.com/rules/E203.html - - docs/conf.py E121 E122 E265 E401 - tests/test_encoding.py E128 E221 E241 E302 E401 E501 E731 - tests/test_form.py E265 - tests/test_html.py E123 E128 E241 E303 E501 E502 - tests/test_http.py E128 E261 E302 W291 - tests/test_url.py E126 E127 E128 E226 E261 E303 E501 W293 W391 - w3lib/encoding.py E126 E128 E302 E305 E401 E501 - w3lib/form.py E402 E501 E721 - w3lib/html.py E128 E302 E501 E502 W504 - w3lib/http.py E501 - w3lib/url.py E128 E261 E302 E305 E501 F841 W291 W293 W504 - w3lib/util.py E302 diff --git a/tests/test_encoding.py b/tests/test_encoding.py index dfda2032..d251504a 100644 --- a/tests/test_encoding.py +++ b/tests/test_encoding.py @@ -40,7 +40,7 @@ def test_bom(self): bom_encoding, bom = read_bom(string) assert bom_encoding is not None assert bom is not None - decoded = string[len(bom) :].decode(bom_encoding) + decoded = string[len(bom):].decode(bom_encoding) self.assertEqual(water_unicode, decoded) # Body without BOM enc, bom = read_bom(b"foo") @@ -280,7 +280,8 @@ def test_html_encoding(self): self._assert_encoding_detected(None, "utf-8", codecs.BOM_UTF8 + body) def test_autodetect(self): - asciif = lambda x: "ascii" + def asciif(x): + return "ascii" body = b"""""" # body encoding takes precedence self._assert_encoding_detected(None, "utf-8", body, auto_detect_fun=asciif) diff --git a/tox.ini b/tox.ini index aae72b69..f565d0dd 100644 --- a/tox.ini +++ b/tox.ini @@ -34,10 +34,13 @@ commands = [testenv:flake8] basepython = python3 deps = - {[testenv]deps} - pytest-flake8 + flake8 commands = - pytest --flake8 + flake8 \ + w3lib \ + setup.py \ + tests \ + {posargs} [testenv:pylint] deps = diff --git a/w3lib/encoding.py b/w3lib/encoding.py index 86b678be..0db0dcf4 100644 --- a/w3lib/encoding.py +++ b/w3lib/encoding.py @@ -285,20 +285,20 @@ def html_to_unicode( # remove BOM if it agrees with the encoding if enc == bom_enc: bom = cast(bytes, bom) - html_body_str = html_body_str[len(bom) :] + html_body_str = html_body_str[len(bom):] elif enc == "utf-16" or enc == "utf-32": # read endianness from BOM, or default to big endian # tools.ietf.org/html/rfc2781 section 4.3 if bom_enc is not None and bom_enc.startswith(enc): enc = bom_enc bom = cast(bytes, bom) - html_body_str = html_body_str[len(bom) :] + html_body_str = html_body_str[len(bom):] else: enc += "-be" return enc, to_unicode(html_body_str, enc) if bom_enc is not None: bom = cast(bytes, bom) - return bom_enc, to_unicode(html_body_str[len(bom) :], bom_enc) + return bom_enc, to_unicode(html_body_str[len(bom):], bom_enc) enc = html_body_declared_encoding(html_body_str) if enc is None and (auto_detect_fun is not None): enc = auto_detect_fun(html_body_str) diff --git a/w3lib/url.py b/w3lib/url.py index 2464576e..6ca5e26c 100644 --- a/w3lib/url.py +++ b/w3lib/url.py @@ -41,7 +41,7 @@ # error handling function for bytes-to-Unicode decoding errors with URLs def _quote_byte(error: UnicodeError) -> Tuple[str, int]: error = cast(AnyUnicodeError, error) - return (to_unicode(quote(error.object[error.start : error.end])), error.end) + return (to_unicode(quote(error.object[error.start: error.end])), error.end) codecs.register_error("percentencode", _quote_byte) @@ -435,7 +435,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult: m = _mediatype_pattern.match(uri) if m: media_type = m.group().decode() - uri = uri[m.end() :] + uri = uri[m.end():] else: media_type_params["charset"] = "US-ASCII" @@ -446,7 +446,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult: if value_quoted: value = re.sub(rb"\\(.)", rb"\1", value_quoted) media_type_params[attribute.decode()] = value.decode() - uri = uri[m.end() :] + uri = uri[m.end():] else: break From 9ed23182482948aa623010744140b20934a3094d Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Sun, 7 Aug 2022 00:50:32 +0500 Subject: [PATCH 2/5] rerun black; silence flake8 when it disagrees with black --- .flake8 | 3 +++ tests/test_encoding.py | 3 ++- tox.ini | 2 +- w3lib/encoding.py | 6 +++--- w3lib/url.py | 6 +++--- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/.flake8 b/.flake8 index 5a66e33f..1758508f 100644 --- a/.flake8 +++ b/.flake8 @@ -9,5 +9,8 @@ ignore = W503, W504, + # black disagrees with flake8, and inserts whitespace + E203, # whitespace before ':' + # It seems flake8 can misfire on it E401, # multiple imports on one line diff --git a/tests/test_encoding.py b/tests/test_encoding.py index d251504a..865cf720 100644 --- a/tests/test_encoding.py +++ b/tests/test_encoding.py @@ -40,7 +40,7 @@ def test_bom(self): bom_encoding, bom = read_bom(string) assert bom_encoding is not None assert bom is not None - decoded = string[len(bom):].decode(bom_encoding) + decoded = string[len(bom) :].decode(bom_encoding) self.assertEqual(water_unicode, decoded) # Body without BOM enc, bom = read_bom(b"foo") @@ -282,6 +282,7 @@ def test_html_encoding(self): def test_autodetect(self): def asciif(x): return "ascii" + body = b"""""" # body encoding takes precedence self._assert_encoding_detected(None, "utf-8", body, auto_detect_fun=asciif) diff --git a/tox.ini b/tox.ini index f565d0dd..5f69ee53 100644 --- a/tox.ini +++ b/tox.ini @@ -51,7 +51,7 @@ commands = [testenv:black] deps = - black==22.3.0 + black==22.6.0 commands = black --check {posargs:conftest.py setup.py tests w3lib} diff --git a/w3lib/encoding.py b/w3lib/encoding.py index 0db0dcf4..86b678be 100644 --- a/w3lib/encoding.py +++ b/w3lib/encoding.py @@ -285,20 +285,20 @@ def html_to_unicode( # remove BOM if it agrees with the encoding if enc == bom_enc: bom = cast(bytes, bom) - html_body_str = html_body_str[len(bom):] + html_body_str = html_body_str[len(bom) :] elif enc == "utf-16" or enc == "utf-32": # read endianness from BOM, or default to big endian # tools.ietf.org/html/rfc2781 section 4.3 if bom_enc is not None and bom_enc.startswith(enc): enc = bom_enc bom = cast(bytes, bom) - html_body_str = html_body_str[len(bom):] + html_body_str = html_body_str[len(bom) :] else: enc += "-be" return enc, to_unicode(html_body_str, enc) if bom_enc is not None: bom = cast(bytes, bom) - return bom_enc, to_unicode(html_body_str[len(bom):], bom_enc) + return bom_enc, to_unicode(html_body_str[len(bom) :], bom_enc) enc = html_body_declared_encoding(html_body_str) if enc is None and (auto_detect_fun is not None): enc = auto_detect_fun(html_body_str) diff --git a/w3lib/url.py b/w3lib/url.py index 6ca5e26c..2464576e 100644 --- a/w3lib/url.py +++ b/w3lib/url.py @@ -41,7 +41,7 @@ # error handling function for bytes-to-Unicode decoding errors with URLs def _quote_byte(error: UnicodeError) -> Tuple[str, int]: error = cast(AnyUnicodeError, error) - return (to_unicode(quote(error.object[error.start: error.end])), error.end) + return (to_unicode(quote(error.object[error.start : error.end])), error.end) codecs.register_error("percentencode", _quote_byte) @@ -435,7 +435,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult: m = _mediatype_pattern.match(uri) if m: media_type = m.group().decode() - uri = uri[m.end():] + uri = uri[m.end() :] else: media_type_params["charset"] = "US-ASCII" @@ -446,7 +446,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult: if value_quoted: value = re.sub(rb"\\(.)", rb"\1", value_quoted) media_type_params[attribute.decode()] = value.decode() - uri = uri[m.end():] + uri = uri[m.end() :] else: break From 5859d57fc3a03d4c3eb5a3f5e5d21b797f3eb2d9 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Sun, 7 Aug 2022 00:53:10 +0500 Subject: [PATCH 3/5] bump mypy version, just in case --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 5f69ee53..632de000 100644 --- a/tox.ini +++ b/tox.ini @@ -27,7 +27,7 @@ basepython = python3 deps = # mypy would error if pytest (or its sub) not found pytest - mypy==0.910 + mypy==0.971 commands = mypy --show-error-codes {posargs: w3lib tests} From ec5d84c32a96b955353691972439cdccf949ee38 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Mon, 8 Aug 2022 15:22:00 +0500 Subject: [PATCH 4/5] Fix flake E401 --- .flake8 | 3 --- w3lib/encoding.py | 5 ++++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.flake8 b/.flake8 index 1758508f..268fd3a8 100644 --- a/.flake8 +++ b/.flake8 @@ -11,6 +11,3 @@ ignore = # black disagrees with flake8, and inserts whitespace E203, # whitespace before ':' - - # It seems flake8 can misfire on it - E401, # multiple imports on one line diff --git a/w3lib/encoding.py b/w3lib/encoding.py index 86b678be..db5a2b25 100644 --- a/w3lib/encoding.py +++ b/w3lib/encoding.py @@ -1,8 +1,11 @@ """ Functions for handling encoding of web pages """ -import re, codecs, encodings +import re +import codecs +import encodings from typing import Callable, Match, Optional, Tuple, Union, cast + from w3lib._types import AnyUnicodeError, StrOrBytes import w3lib.util From c394ec40d63abfd7ea9dc785965804addb65c3f8 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Mon, 8 Aug 2022 15:23:24 +0500 Subject: [PATCH 5/5] move flake8 arguments to posargs in tox.ini --- tox.ini | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tox.ini b/tox.ini index 632de000..5647f5a1 100644 --- a/tox.ini +++ b/tox.ini @@ -37,10 +37,7 @@ deps = flake8 commands = flake8 \ - w3lib \ - setup.py \ - tests \ - {posargs} + {posargs:w3lib tests setup.py} [testenv:pylint] deps =