Merge pull request #206 from scrapy/improve-typing

wRAR · web-flow · commit c1893e94dea4 · 2022-11-28T18:18:20.000+05:00
Full typing
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
@@ -1,5 +1,6 @@
 import codecs
 import unittest
+from typing import Optional, Union, List, Any
 
 from w3lib.encoding import (
     html_body_declared_encoding,
@@ -121,11 +122,11 @@ def test_invalid_utf8(self):
         self.assertEqual(to_unicode(b"\xc2\xc2\xa3", "utf-8"), "\ufffd\xa3")
 
 
-def ct(charset):
+def ct(charset: Optional[str]) -> Optional[str]:
     return "Content-Type: text/html; charset=" + charset if charset else None
 
 
-def norm_encoding(enc):
+def norm_encoding(enc: str) -> str:
     return codecs.lookup(enc).name
 
 
@@ -138,7 +139,13 @@ def test_unicode_body(self):
         self.assertTrue(isinstance(body_unicode, str))
         self.assertEqual(body_unicode, unicode_string)
 
-    def _assert_encoding(self, content_type, body, expected_encoding, expected_unicode):
+    def _assert_encoding(
+        self,
+        content_type: Optional[str],
+        body: bytes,
+        expected_encoding: str,
+        expected_unicode: Union[str, List[str]],
+    ) -> None:
         assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body)
         self.assertTrue(isinstance(body_unicode, str))
@@ -210,8 +217,12 @@ def test_replace_wrong_encoding(self):
         assert "<span>value</span>" in body_unicode, repr(body_unicode)
 
     def _assert_encoding_detected(
-        self, content_type, expected_encoding, body, **kwargs
-    ):
+        self,
+        content_type: Optional[str],
+        expected_encoding: str,
+        body: bytes,
+        **kwargs: Any,
+    ) -> None:
         assert not isinstance(body, str)
         encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs)
         self.assertTrue(isinstance(body_unicode, str))
diff --git a/tests/test_url.py b/tests/test_url.py
@@ -1,5 +1,7 @@
 import os
 import unittest
+from inspect import isclass
+from typing import Optional, Union, Type, Callable, Tuple, List
 from urllib.parse import urlparse
 
 import pytest
@@ -10,6 +12,7 @@
     _ASCII_TAB_OR_NEWLINE,
     _C0_CONTROL_OR_SPACE,
 )
+from w3lib._types import StrOrBytes
 from w3lib._url import _SPECIAL_SCHEMES
 from w3lib.url import (
     add_or_replace_parameter,
@@ -27,17 +30,16 @@
     url_query_cleaner,
 )
 
-
-UNSET = object()
-
 # Test cases for URL-to-safe-URL conversions with a URL and an encoding as
 # input parameters.
 #
 # (encoding, input URL, output URL or exception)
-SAFE_URL_ENCODING_CASES = (
-    (UNSET, "", ValueError),
-    (UNSET, "https://example.com", "https://example.com"),
-    (UNSET, "https://example.com/©", "https://example.com/%C2%A9"),
+SAFE_URL_ENCODING_CASES: List[
+    Tuple[Optional[str], StrOrBytes, Union[str, Type[Exception]]]
+] = [
+    (None, "", ValueError),
+    (None, "https://example.com", "https://example.com"),
+    (None, "https://example.com/©", "https://example.com/%C2%A9"),
     # Paths are always UTF-8-encoded.
     ("iso-8859-1", "https://example.com/©", "https://example.com/%C2%A9"),
     # Queries are UTF-8-encoded if the scheme is not special, ws or wss.
@@ -53,7 +55,7 @@
     ),
     # Fragments are always UTF-8-encoded.
     ("iso-8859-1", "https://example.com#©", "https://example.com#%C2%A9"),
-)
+]
 
 INVALID_SCHEME_FOLLOW_UPS = "".join(
     chr(value)
@@ -315,15 +317,17 @@
 )
 
 
-def _test_safe_url_func(url, *, encoding=UNSET, output, func):
+def _test_safe_url_func(
+    url: StrOrBytes,
+    *,
+    encoding: Optional[str] = None,
+    output: Union[str, Type[Exception]],
+    func: Callable[..., str],
+) -> None:
     kwargs = {}
-    if encoding is not UNSET:
+    if encoding is not None:
         kwargs["encoding"] = encoding
-    try:
-        is_exception = issubclass(output, Exception)
-    except TypeError:
-        is_exception = False
-    if is_exception:
+    if isclass(output) and issubclass(output, Exception):
         with pytest.raises(output):
             func(url, **kwargs)
         return
@@ -332,7 +336,12 @@ def _test_safe_url_func(url, *, encoding=UNSET, output, func):
     assert func(actual, **kwargs) == output  # Idempotency
 
 
-def _test_safe_url_string(url, *, encoding=UNSET, output):
+def _test_safe_url_string(
+    url: StrOrBytes,
+    *,
+    encoding: Optional[str] = None,
+    output: Union[str, Type[Exception]],
+) -> None:
     return _test_safe_url_func(
         url,
         encoding=encoding,
@@ -342,7 +351,7 @@ def _test_safe_url_string(url, *, encoding=UNSET, output):
 
 
 KNOWN_SAFE_URL_STRING_ENCODING_ISSUES = {
-    (UNSET, ""),  # Invalid URL
+    (None, ""),  # Invalid URL
     # UTF-8 encoding is not enforced in non-special URLs, or in URLs with the
     # ws or wss schemas.
     ("iso-8859-1", "a://example.com?\xa9"),
@@ -362,7 +371,9 @@ def _test_safe_url_string(url, *, encoding=UNSET, output):
         for case in SAFE_URL_ENCODING_CASES
     ),
 )
-def test_safe_url_string_encoding(encoding, url, output):
+def test_safe_url_string_encoding(
+    encoding: Optional[str], url: StrOrBytes, output: Union[str, Type[Exception]]
+) -> None:
     _test_safe_url_string(url, encoding=encoding, output=output)
 
 
@@ -421,7 +432,9 @@ def test_safe_url_string_encoding(encoding, url, output):
         for case in SAFE_URL_URL_CASES
     ),
 )
-def test_safe_url_string_url(url, output):
+def test_safe_url_string_url(
+    url: StrOrBytes, output: Union[str, Type[Exception]]
+) -> None:
     _test_safe_url_string(url, output=output)
 
 
diff --git a/tox.ini b/tox.ini
@@ -27,9 +27,9 @@ basepython = python3
 deps =
     # mypy would error if pytest (or its sub) not found
     pytest
-    mypy==0.971
+    mypy==0.991
 commands =
-    mypy --show-error-codes {posargs: w3lib tests}
+    mypy --strict --show-error-codes {posargs: w3lib tests}
 
 [testenv:flake8]
 basepython = python3
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
@@ -136,7 +136,7 @@ def _c18n_encoding(encoding: str) -> str:
     encoding aliases
     """
     normed = encodings.normalize_encoding(encoding).lower()
-    return encodings.aliases.aliases.get(normed, normed)
+    return cast(str, encodings.aliases.aliases.get(normed, normed))
 
 
 def resolve_encoding(encoding_alias: str) -> Optional[str]:
diff --git a/w3lib/html.py b/w3lib/html.py
@@ -66,7 +66,7 @@ def replace_entities(
 
     """
 
-    def convert_entity(m: Match) -> str:
+    def convert_entity(m: Match[str]) -> str:
         groups = m.groupdict()
         number = None
         if groups.get("dec"):
@@ -205,7 +205,7 @@ def will_remove(tag: str) -> bool:
         else:
             return tag not in keep
 
-    def remove_tag(m: Match) -> str:
+    def remove_tag(m: Match[str]) -> str:
         tag = m.group(1)
         return "" if will_remove(tag) else m.group(0)
 
@@ -278,7 +278,9 @@ def unquote_markup(
 
     """
 
-    def _get_fragments(txt: str, pattern: Pattern) -> Iterable[Union[str, Match]]:
+    def _get_fragments(
+        txt: str, pattern: Pattern[str]
+    ) -> Iterable[Union[str, Match[str]]]:
         offset = 0
         for match in pattern.finditer(txt):
             match_s, match_e = match.span(1)
diff --git a/w3lib/http.py b/w3lib/http.py
@@ -2,7 +2,7 @@
 from typing import Any, List, MutableMapping, Optional, AnyStr, Sequence, Union, Mapping
 from w3lib.util import to_bytes, to_unicode
 
-HeadersDictInput = Mapping[bytes, Union[Any, Sequence]]
+HeadersDictInput = Mapping[bytes, Union[Any, Sequence[bytes]]]
 HeadersDictOutput = MutableMapping[bytes, List[bytes]]
 
 
diff --git a/w3lib/url.py b/w3lib/url.py
@@ -302,7 +302,7 @@ def url_query_cleaner(
     url = "?".join([base, sep.join(querylist)]) if querylist else base
     if keep_fragments and fragment:
         url += "#" + fragment
-    return cast(str, url)
+    return url
 
 
 def _add_or_replace_parameters(url: str, params: Dict[str, str]) -> str:
@@ -663,7 +663,7 @@ def parse_qsl_to_bytes(
     # (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
     # except for the unquote(s, encoding, errors) calls replaced
     # with unquote_to_bytes(s)
-    coerce_args = cast(Callable[..., Tuple[str, Callable]], _coerce_args)
+    coerce_args = cast(Callable[..., Tuple[str, Callable[..., bytes]]], _coerce_args)
     qs, _coerce_result = coerce_args(qs)
     pairs = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
     r = []
@@ -684,5 +684,5 @@ def parse_qsl_to_bytes(
             value: StrOrBytes = nv[1].replace("+", " ")
             value = unquote_to_bytes(value)
             value = _coerce_result(value)
-            r.append((cast(bytes, name), cast(bytes, value)))
+            r.append((name, value))
     return r