From 13cc93866149a6cf665b619e8e7e5928356ad3b9 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Sun, 7 Aug 2022 00:44:33 +0500
Subject: [PATCH 1/5] switch from pytest-flake8 to raw flake8; clean up ignores

pytest-flake8 doesn't work with latest flake
---
 .flake8                | 13 +++++++++++++
 pytest.ini             | 17 -----------------
 tests/test_encoding.py |  5 +++--
 tox.ini                |  9 ++++++---
 w3lib/encoding.py      |  6 +++---
 w3lib/url.py           |  6 +++---
 6 files changed, 28 insertions(+), 28 deletions(-)
 create mode 100644 .flake8
diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..5a66e33f
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,13 @@
+[flake8]
+ignore =
+    # Refers to the max-line length. Let's suppress the error and simply
+    # let black take care on how it wants to format the lines.
+    E501,
+
+    # Refers to "line break before/after binary operator".
+    # Similar to above, let black take care of the formatting.
+    W503,
+    W504,
+
+    # It seems flake8 can misfire on it
+    E401, # multiple imports on one line
diff --git a/pytest.ini b/pytest.ini
index 68cfd625..515b2196 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,19 +1,2 @@
 [pytest]
 doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES
-flake8-max-line-length = 88
-flake8-ignore =
-    W503  # https://www.flake8rules.com/rules/W503.html
-    E203  # https://www.flake8rules.com/rules/E203.html
-
-    docs/conf.py E121 E122 E265 E401
-    tests/test_encoding.py E128 E221 E241 E302 E401 E501 E731
-    tests/test_form.py E265
-    tests/test_html.py E123 E128 E241 E303 E501 E502
-    tests/test_http.py E128 E261 E302 W291
-    tests/test_url.py E126 E127 E128 E226 E261 E303 E501 W293 W391
-    w3lib/encoding.py E126 E128 E302 E305 E401 E501
-    w3lib/form.py E402 E501 E721
-    w3lib/html.py E128 E302 E501 E502 W504
-    w3lib/http.py E501
-    w3lib/url.py E128 E261 E302 E305 E501 F841 W291 W293 W504
-    w3lib/util.py E302
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index dfda2032..d251504a 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -40,7 +40,7 @@ def test_bom(self):
             bom_encoding, bom = read_bom(string)
             assert bom_encoding is not None
             assert bom is not None
-            decoded = string[len(bom) :].decode(bom_encoding)
+            decoded = string[len(bom):].decode(bom_encoding)
             self.assertEqual(water_unicode, decoded)
         # Body without BOM
         enc, bom = read_bom(b"foo")
@@ -280,7 +280,8 @@ def test_html_encoding(self):
         self._assert_encoding_detected(None, "utf-8", codecs.BOM_UTF8 + body)
 
     def test_autodetect(self):
-        asciif = lambda x: "ascii"
+        def asciif(x):
+            return "ascii"
         body = b"""<meta charset="utf-8">"""
         # body encoding takes precedence
         self._assert_encoding_detected(None, "utf-8", body, auto_detect_fun=asciif)
diff --git a/tox.ini b/tox.ini
index aae72b69..f565d0dd 100644
--- a/tox.ini
+++ b/tox.ini
@@ -34,10 +34,13 @@ commands =
 [testenv:flake8]
 basepython = python3
 deps =
-    {[testenv]deps}
-    pytest-flake8
+    flake8
 commands =
-    pytest --flake8
+    flake8 \
+    w3lib \
+    setup.py \
+    tests \
+    {posargs}
 
 [testenv:pylint]
 deps =
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 86b678be..0db0dcf4 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -285,20 +285,20 @@ def html_to_unicode(
         # remove BOM if it agrees with the encoding
         if enc == bom_enc:
             bom = cast(bytes, bom)
-            html_body_str = html_body_str[len(bom) :]
+            html_body_str = html_body_str[len(bom):]
         elif enc == "utf-16" or enc == "utf-32":
             # read endianness from BOM, or default to big endian
             # tools.ietf.org/html/rfc2781 section 4.3
             if bom_enc is not None and bom_enc.startswith(enc):
                 enc = bom_enc
                 bom = cast(bytes, bom)
-                html_body_str = html_body_str[len(bom) :]
+                html_body_str = html_body_str[len(bom):]
             else:
                 enc += "-be"
         return enc, to_unicode(html_body_str, enc)
     if bom_enc is not None:
         bom = cast(bytes, bom)
-        return bom_enc, to_unicode(html_body_str[len(bom) :], bom_enc)
+        return bom_enc, to_unicode(html_body_str[len(bom):], bom_enc)
     enc = html_body_declared_encoding(html_body_str)
     if enc is None and (auto_detect_fun is not None):
         enc = auto_detect_fun(html_body_str)
diff --git a/w3lib/url.py b/w3lib/url.py
index 2464576e..6ca5e26c 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -41,7 +41,7 @@
 # error handling function for bytes-to-Unicode decoding errors with URLs
 def _quote_byte(error: UnicodeError) -> Tuple[str, int]:
     error = cast(AnyUnicodeError, error)
-    return (to_unicode(quote(error.object[error.start : error.end])), error.end)
+    return (to_unicode(quote(error.object[error.start: error.end])), error.end)
 
 
 codecs.register_error("percentencode", _quote_byte)
@@ -435,7 +435,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult:
     m = _mediatype_pattern.match(uri)
     if m:
         media_type = m.group().decode()
-        uri = uri[m.end() :]
+        uri = uri[m.end():]
     else:
         media_type_params["charset"] = "US-ASCII"
 
@@ -446,7 +446,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult:
             if value_quoted:
                 value = re.sub(rb"\\(.)", rb"\1", value_quoted)
             media_type_params[attribute.decode()] = value.decode()
-            uri = uri[m.end() :]
+            uri = uri[m.end():]
         else:
             break
 

From 9ed23182482948aa623010744140b20934a3094d Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Sun, 7 Aug 2022 00:50:32 +0500
Subject: [PATCH 2/5] rerun black; silence flake8 when it disagrees with black

---
 .flake8                | 3 +++
 tests/test_encoding.py | 3 ++-
 tox.ini                | 2 +-
 w3lib/encoding.py      | 6 +++---
 w3lib/url.py           | 6 +++---
 5 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/.flake8 b/.flake8
index 5a66e33f..1758508f 100644
--- a/.flake8
+++ b/.flake8
@@ -9,5 +9,8 @@ ignore =
     W503,
     W504,
 
+    # black disagrees with flake8, and inserts whitespace
+    E203,  # whitespace before ':'
+
     # It seems flake8 can misfire on it
     E401, # multiple imports on one line
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index d251504a..865cf720 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -40,7 +40,7 @@ def test_bom(self):
             bom_encoding, bom = read_bom(string)
             assert bom_encoding is not None
             assert bom is not None
-            decoded = string[len(bom):].decode(bom_encoding)
+            decoded = string[len(bom) :].decode(bom_encoding)
             self.assertEqual(water_unicode, decoded)
         # Body without BOM
         enc, bom = read_bom(b"foo")
@@ -282,6 +282,7 @@ def test_html_encoding(self):
     def test_autodetect(self):
         def asciif(x):
             return "ascii"
+
         body = b"""<meta charset="utf-8">"""
         # body encoding takes precedence
         self._assert_encoding_detected(None, "utf-8", body, auto_detect_fun=asciif)
diff --git a/tox.ini b/tox.ini
index f565d0dd..5f69ee53 100644
--- a/tox.ini
+++ b/tox.ini
@@ -51,7 +51,7 @@ commands =
 
 [testenv:black]
 deps =
-    black==22.3.0
+    black==22.6.0
 commands =
     black --check {posargs:conftest.py setup.py tests w3lib}
 
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 0db0dcf4..86b678be 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -285,20 +285,20 @@ def html_to_unicode(
         # remove BOM if it agrees with the encoding
         if enc == bom_enc:
             bom = cast(bytes, bom)
-            html_body_str = html_body_str[len(bom):]
+            html_body_str = html_body_str[len(bom) :]
         elif enc == "utf-16" or enc == "utf-32":
             # read endianness from BOM, or default to big endian
             # tools.ietf.org/html/rfc2781 section 4.3
             if bom_enc is not None and bom_enc.startswith(enc):
                 enc = bom_enc
                 bom = cast(bytes, bom)
-                html_body_str = html_body_str[len(bom):]
+                html_body_str = html_body_str[len(bom) :]
             else:
                 enc += "-be"
         return enc, to_unicode(html_body_str, enc)
     if bom_enc is not None:
         bom = cast(bytes, bom)
-        return bom_enc, to_unicode(html_body_str[len(bom):], bom_enc)
+        return bom_enc, to_unicode(html_body_str[len(bom) :], bom_enc)
     enc = html_body_declared_encoding(html_body_str)
     if enc is None and (auto_detect_fun is not None):
         enc = auto_detect_fun(html_body_str)
diff --git a/w3lib/url.py b/w3lib/url.py
index 6ca5e26c..2464576e 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -41,7 +41,7 @@
 # error handling function for bytes-to-Unicode decoding errors with URLs
 def _quote_byte(error: UnicodeError) -> Tuple[str, int]:
     error = cast(AnyUnicodeError, error)
-    return (to_unicode(quote(error.object[error.start: error.end])), error.end)
+    return (to_unicode(quote(error.object[error.start : error.end])), error.end)
 
 
 codecs.register_error("percentencode", _quote_byte)
@@ -435,7 +435,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult:
     m = _mediatype_pattern.match(uri)
     if m:
         media_type = m.group().decode()
-        uri = uri[m.end():]
+        uri = uri[m.end() :]
     else:
         media_type_params["charset"] = "US-ASCII"
 
@@ -446,7 +446,7 @@ def parse_data_uri(uri: StrOrBytes) -> ParseDataURIResult:
             if value_quoted:
                 value = re.sub(rb"\\(.)", rb"\1", value_quoted)
             media_type_params[attribute.decode()] = value.decode()
-            uri = uri[m.end():]
+            uri = uri[m.end() :]
         else:
             break
 

From 5859d57fc3a03d4c3eb5a3f5e5d21b797f3eb2d9 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Sun, 7 Aug 2022 00:53:10 +0500
Subject: [PATCH 3/5] bump mypy version, just in case

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 5f69ee53..632de000 100644
--- a/tox.ini
+++ b/tox.ini
@@ -27,7 +27,7 @@ basepython = python3
 deps =
     # mypy would error if pytest (or its sub) not found
     pytest
-    mypy==0.910
+    mypy==0.971
 commands =
     mypy --show-error-codes {posargs: w3lib tests}
 

From ec5d84c32a96b955353691972439cdccf949ee38 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Mon, 8 Aug 2022 15:22:00 +0500
Subject: [PATCH 4/5] Fix flake E401

---
 .flake8           | 3 ---
 w3lib/encoding.py | 5 ++++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.flake8 b/.flake8
index 1758508f..268fd3a8 100644
--- a/.flake8
+++ b/.flake8
@@ -11,6 +11,3 @@ ignore =
 
     # black disagrees with flake8, and inserts whitespace
     E203,  # whitespace before ':'
-
-    # It seems flake8 can misfire on it
-    E401, # multiple imports on one line
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index 86b678be..db5a2b25 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -1,8 +1,11 @@
 """
 Functions for handling encoding of web pages
 """
-import re, codecs, encodings
+import re
+import codecs
+import encodings
 from typing import Callable, Match, Optional, Tuple, Union, cast
+
 from w3lib._types import AnyUnicodeError, StrOrBytes
 import w3lib.util
 

From c394ec40d63abfd7ea9dc785965804addb65c3f8 Mon Sep 17 00:00:00 2001
From: Mikhail Korobov <kmike84@gmail.com>
Date: Mon, 8 Aug 2022 15:23:24 +0500
Subject: [PATCH 5/5] move flake8 arguments to posargs in tox.ini

---
 tox.ini | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tox.ini b/tox.ini
index 632de000..5647f5a1 100644
--- a/tox.ini
+++ b/tox.ini
@@ -37,10 +37,7 @@ deps =
     flake8
 commands =
     flake8 \
-    w3lib \
-    setup.py \
-    tests \
-    {posargs}
+    {posargs:w3lib tests setup.py}
 
 [testenv:pylint]
 deps =