Skip to content

Commit c1893e9

Browse files
authored
Merge pull request #206 from scrapy/improve-typing
Full typing
2 parents be369f1 + a709f38 commit c1893e9

File tree

7 files changed

+60
-34
lines changed

7 files changed

+60
-34
lines changed

tests/test_encoding.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import codecs
22
import unittest
3+
from typing import Optional, Union, List, Any
34

45
from w3lib.encoding import (
56
html_body_declared_encoding,
@@ -121,11 +122,11 @@ def test_invalid_utf8(self):
121122
self.assertEqual(to_unicode(b"\xc2\xc2\xa3", "utf-8"), "\ufffd\xa3")
122123

123124

124-
def ct(charset):
125+
def ct(charset: Optional[str]) -> Optional[str]:
125126
return "Content-Type: text/html; charset=" + charset if charset else None
126127

127128

128-
def norm_encoding(enc):
129+
def norm_encoding(enc: str) -> str:
129130
return codecs.lookup(enc).name
130131

131132

@@ -138,7 +139,13 @@ def test_unicode_body(self):
138139
self.assertTrue(isinstance(body_unicode, str))
139140
self.assertEqual(body_unicode, unicode_string)
140141

141-
def _assert_encoding(self, content_type, body, expected_encoding, expected_unicode):
142+
def _assert_encoding(
143+
self,
144+
content_type: Optional[str],
145+
body: bytes,
146+
expected_encoding: str,
147+
expected_unicode: Union[str, List[str]],
148+
) -> None:
142149
assert not isinstance(body, str)
143150
encoding, body_unicode = html_to_unicode(ct(content_type), body)
144151
self.assertTrue(isinstance(body_unicode, str))
@@ -210,8 +217,12 @@ def test_replace_wrong_encoding(self):
210217
assert "<span>value</span>" in body_unicode, repr(body_unicode)
211218

212219
def _assert_encoding_detected(
213-
self, content_type, expected_encoding, body, **kwargs
214-
):
220+
self,
221+
content_type: Optional[str],
222+
expected_encoding: str,
223+
body: bytes,
224+
**kwargs: Any,
225+
) -> None:
215226
assert not isinstance(body, str)
216227
encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs)
217228
self.assertTrue(isinstance(body_unicode, str))

tests/test_url.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
22
import unittest
3+
from inspect import isclass
4+
from typing import Optional, Union, Type, Callable, Tuple, List
35
from urllib.parse import urlparse
46

57
import pytest
@@ -10,6 +12,7 @@
1012
_ASCII_TAB_OR_NEWLINE,
1113
_C0_CONTROL_OR_SPACE,
1214
)
15+
from w3lib._types import StrOrBytes
1316
from w3lib._url import _SPECIAL_SCHEMES
1417
from w3lib.url import (
1518
add_or_replace_parameter,
@@ -27,17 +30,16 @@
2730
url_query_cleaner,
2831
)
2932

30-
31-
UNSET = object()
32-
3333
# Test cases for URL-to-safe-URL conversions with a URL and an encoding as
3434
# input parameters.
3535
#
3636
# (encoding, input URL, output URL or exception)
37-
SAFE_URL_ENCODING_CASES = (
38-
(UNSET, "", ValueError),
39-
(UNSET, "https://example.com", "https://example.com"),
40-
(UNSET, "https://example.com/©", "https://example.com/%C2%A9"),
37+
SAFE_URL_ENCODING_CASES: List[
38+
Tuple[Optional[str], StrOrBytes, Union[str, Type[Exception]]]
39+
] = [
40+
(None, "", ValueError),
41+
(None, "https://example.com", "https://example.com"),
42+
(None, "https://example.com/©", "https://example.com/%C2%A9"),
4143
# Paths are always UTF-8-encoded.
4244
("iso-8859-1", "https://example.com/©", "https://example.com/%C2%A9"),
4345
# Queries are UTF-8-encoded if the scheme is not special, ws or wss.
@@ -53,7 +55,7 @@
5355
),
5456
# Fragments are always UTF-8-encoded.
5557
("iso-8859-1", "https://example.com#©", "https://example.com#%C2%A9"),
56-
)
58+
]
5759

5860
INVALID_SCHEME_FOLLOW_UPS = "".join(
5961
chr(value)
@@ -315,15 +317,17 @@
315317
)
316318

317319

318-
def _test_safe_url_func(url, *, encoding=UNSET, output, func):
320+
def _test_safe_url_func(
321+
url: StrOrBytes,
322+
*,
323+
encoding: Optional[str] = None,
324+
output: Union[str, Type[Exception]],
325+
func: Callable[..., str],
326+
) -> None:
319327
kwargs = {}
320-
if encoding is not UNSET:
328+
if encoding is not None:
321329
kwargs["encoding"] = encoding
322-
try:
323-
is_exception = issubclass(output, Exception)
324-
except TypeError:
325-
is_exception = False
326-
if is_exception:
330+
if isclass(output) and issubclass(output, Exception):
327331
with pytest.raises(output):
328332
func(url, **kwargs)
329333
return
@@ -332,7 +336,12 @@ def _test_safe_url_func(url, *, encoding=UNSET, output, func):
332336
assert func(actual, **kwargs) == output # Idempotency
333337

334338

335-
def _test_safe_url_string(url, *, encoding=UNSET, output):
339+
def _test_safe_url_string(
340+
url: StrOrBytes,
341+
*,
342+
encoding: Optional[str] = None,
343+
output: Union[str, Type[Exception]],
344+
) -> None:
336345
return _test_safe_url_func(
337346
url,
338347
encoding=encoding,
@@ -342,7 +351,7 @@ def _test_safe_url_string(url, *, encoding=UNSET, output):
342351

343352

344353
KNOWN_SAFE_URL_STRING_ENCODING_ISSUES = {
345-
(UNSET, ""), # Invalid URL
354+
(None, ""), # Invalid URL
346355
# UTF-8 encoding is not enforced in non-special URLs, or in URLs with the
347356
# ws or wss schemas.
348357
("iso-8859-1", "a://example.com?\xa9"),
@@ -362,7 +371,9 @@ def _test_safe_url_string(url, *, encoding=UNSET, output):
362371
for case in SAFE_URL_ENCODING_CASES
363372
),
364373
)
365-
def test_safe_url_string_encoding(encoding, url, output):
374+
def test_safe_url_string_encoding(
375+
encoding: Optional[str], url: StrOrBytes, output: Union[str, Type[Exception]]
376+
) -> None:
366377
_test_safe_url_string(url, encoding=encoding, output=output)
367378

368379

@@ -421,7 +432,9 @@ def test_safe_url_string_encoding(encoding, url, output):
421432
for case in SAFE_URL_URL_CASES
422433
),
423434
)
424-
def test_safe_url_string_url(url, output):
435+
def test_safe_url_string_url(
436+
url: StrOrBytes, output: Union[str, Type[Exception]]
437+
) -> None:
425438
_test_safe_url_string(url, output=output)
426439

427440

tox.ini

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ basepython = python3
2727
deps =
2828
# mypy would error if pytest (or its sub) not found
2929
pytest
30-
mypy==0.971
30+
mypy==0.991
3131
commands =
32-
mypy --show-error-codes {posargs: w3lib tests}
32+
mypy --strict --show-error-codes {posargs: w3lib tests}
3333

3434
[testenv:flake8]
3535
basepython = python3

w3lib/encoding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def _c18n_encoding(encoding: str) -> str:
136136
encoding aliases
137137
"""
138138
normed = encodings.normalize_encoding(encoding).lower()
139-
return encodings.aliases.aliases.get(normed, normed)
139+
return cast(str, encodings.aliases.aliases.get(normed, normed))
140140

141141

142142
def resolve_encoding(encoding_alias: str) -> Optional[str]:

w3lib/html.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def replace_entities(
6666
6767
"""
6868

69-
def convert_entity(m: Match) -> str:
69+
def convert_entity(m: Match[str]) -> str:
7070
groups = m.groupdict()
7171
number = None
7272
if groups.get("dec"):
@@ -205,7 +205,7 @@ def will_remove(tag: str) -> bool:
205205
else:
206206
return tag not in keep
207207

208-
def remove_tag(m: Match) -> str:
208+
def remove_tag(m: Match[str]) -> str:
209209
tag = m.group(1)
210210
return "" if will_remove(tag) else m.group(0)
211211

@@ -278,7 +278,9 @@ def unquote_markup(
278278
279279
"""
280280

281-
def _get_fragments(txt: str, pattern: Pattern) -> Iterable[Union[str, Match]]:
281+
def _get_fragments(
282+
txt: str, pattern: Pattern[str]
283+
) -> Iterable[Union[str, Match[str]]]:
282284
offset = 0
283285
for match in pattern.finditer(txt):
284286
match_s, match_e = match.span(1)

w3lib/http.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Any, List, MutableMapping, Optional, AnyStr, Sequence, Union, Mapping
33
from w3lib.util import to_bytes, to_unicode
44

5-
HeadersDictInput = Mapping[bytes, Union[Any, Sequence]]
5+
HeadersDictInput = Mapping[bytes, Union[Any, Sequence[bytes]]]
66
HeadersDictOutput = MutableMapping[bytes, List[bytes]]
77

88

w3lib/url.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def url_query_cleaner(
302302
url = "?".join([base, sep.join(querylist)]) if querylist else base
303303
if keep_fragments and fragment:
304304
url += "#" + fragment
305-
return cast(str, url)
305+
return url
306306

307307

308308
def _add_or_replace_parameters(url: str, params: Dict[str, str]) -> str:
@@ -663,7 +663,7 @@ def parse_qsl_to_bytes(
663663
# (at https://hg.python.org/cpython/rev/c38ac7ab8d9a)
664664
# except for the unquote(s, encoding, errors) calls replaced
665665
# with unquote_to_bytes(s)
666-
coerce_args = cast(Callable[..., Tuple[str, Callable]], _coerce_args)
666+
coerce_args = cast(Callable[..., Tuple[str, Callable[..., bytes]]], _coerce_args)
667667
qs, _coerce_result = coerce_args(qs)
668668
pairs = [s2 for s1 in qs.split("&") for s2 in s1.split(";")]
669669
r = []
@@ -684,5 +684,5 @@ def parse_qsl_to_bytes(
684684
value: StrOrBytes = nv[1].replace("+", " ")
685685
value = unquote_to_bytes(value)
686686
value = _coerce_result(value)
687-
r.append((cast(bytes, name), cast(bytes, value)))
687+
r.append((name, value))
688688
return r

0 commit comments

Comments
 (0)