Skip to content

Commit 0fd1590

Browse files
authored
Merge pull request #170 from Gallaecio/deprecate-python2-api
2 parents ef5c110 + 31b8ecd commit 0fd1590

File tree

3 files changed

+91
-30
lines changed

3 files changed

+91
-30
lines changed

tests/test_util.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from unittest import TestCase
2+
3+
from pytest import deprecated_call, raises
4+
5+
from w3lib.util import (
6+
str_to_unicode,
7+
to_bytes,
8+
to_native_str,
9+
to_unicode,
10+
unicode_to_str,
11+
)
12+
13+
14+
class StrToUnicodeTestCase(TestCase):
15+
16+
def test_deprecation(self):
17+
with deprecated_call():
18+
str_to_unicode('')
19+
20+
21+
class ToBytesTestCase(TestCase):
22+
23+
def test_type_error(self):
24+
with raises(TypeError):
25+
to_bytes(True)
26+
27+
28+
class ToNativeStrTestCase(TestCase):
29+
30+
def test_deprecation(self):
31+
with deprecated_call():
32+
to_native_str('')
33+
34+
35+
class ToUnicodeTestCase(TestCase):
36+
37+
def test_type_error(self):
38+
with raises(TypeError):
39+
to_unicode(True)
40+
41+
42+
class UnicodeToStrTestCase(TestCase):
43+
44+
def test_deprecation(self):
45+
with deprecated_call():
46+
unicode_to_str('')

w3lib/url.py

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424
urlunsplit,
2525
)
2626
from urllib.request import pathname2url, url2pathname
27-
from w3lib.util import to_bytes, to_native_str, to_unicode
27+
from w3lib.util import to_unicode
2828

2929

3030
# error handling function for bytes-to-Unicode decoding errors with URLs
3131
def _quote_byte(error):
32-
return (to_unicode(quote(error.object[error.start:error.end])), error.end)
32+
return (quote(error.object[error.start:error.end]), error.end)
3333

3434
codecs.register_error('percentencode', _quote_byte)
3535

@@ -77,26 +77,22 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8', quote_path=True)
7777
# IDNA encoding can fail for too long labels (>63 characters)
7878
# or missing labels (e.g. http://.example.com)
7979
try:
80-
netloc = parts.netloc.encode('idna')
80+
netloc = parts.netloc.encode('idna').decode()
8181
except UnicodeError:
8282
netloc = parts.netloc
8383

8484
# default encoding for path component SHOULD be UTF-8
8585
if quote_path:
86-
path = quote(to_bytes(parts.path, path_encoding), _path_safe_chars)
86+
path = quote(parts.path.encode(path_encoding), _path_safe_chars)
8787
else:
88-
path = to_native_str(parts.path)
88+
path = parts.path
8989

90-
# quote() in Python2 return type follows input type;
91-
# quote() in Python3 always returns Unicode (native str)
9290
return urlunsplit((
93-
to_native_str(parts.scheme),
94-
to_native_str(netloc).rstrip(':'),
91+
parts.scheme,
92+
netloc.rstrip(':'),
9593
path,
96-
# encoding of query and fragment follows page encoding
97-
# or form-charset (if known and passed)
98-
quote(to_bytes(parts.query, encoding), _safe_chars),
99-
quote(to_bytes(parts.fragment, encoding), _safe_chars),
94+
quote(parts.query.encode(encoding), _safe_chars),
95+
quote(parts.fragment.encode(encoding), _safe_chars),
10096
))
10197

10298

@@ -410,22 +406,17 @@ def _safe_ParseResult(parts, encoding='utf8', path_encoding='utf8'):
410406
# IDNA encoding can fail for too long labels (>63 characters)
411407
# or missing labels (e.g. http://.example.com)
412408
try:
413-
netloc = parts.netloc.encode('idna')
409+
netloc = parts.netloc.encode('idna').decode()
414410
except UnicodeError:
415411
netloc = parts.netloc
416412

417413
return (
418-
to_native_str(parts.scheme),
419-
to_native_str(netloc),
420-
421-
# default encoding for path component SHOULD be UTF-8
422-
quote(to_bytes(parts.path, path_encoding), _path_safe_chars),
423-
quote(to_bytes(parts.params, path_encoding), _safe_chars),
424-
425-
# encoding of query and fragment follows page encoding
426-
# or form-charset (if known and passed)
427-
quote(to_bytes(parts.query, encoding), _safe_chars),
428-
quote(to_bytes(parts.fragment, encoding), _safe_chars)
414+
parts.scheme,
415+
netloc,
416+
quote(parts.path.encode(path_encoding), _path_safe_chars),
417+
quote(parts.params.encode(path_encoding), _safe_chars),
418+
quote(parts.query.encode(encoding), _safe_chars),
419+
quote(parts.fragment.encode(encoding), _safe_chars)
429420
)
430421

431422

@@ -466,7 +457,7 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
466457
# if not for proper URL expected by remote website.
467458
try:
468459
scheme, netloc, path, params, query, fragment = _safe_ParseResult(
469-
parse_url(url), encoding=encoding)
460+
parse_url(url), encoding=encoding or 'utf8')
470461
except UnicodeEncodeError as e:
471462
scheme, netloc, path, params, query, fragment = _safe_ParseResult(
472463
parse_url(url), encoding='utf8')

w3lib/util.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
1+
from warnings import warn
2+
3+
14
def str_to_unicode(text, encoding=None, errors='strict'):
5+
warn(
6+
"The w3lib.utils.str_to_unicode function is deprecated and "
7+
"will be removed in a future release.",
8+
DeprecationWarning,
9+
stacklevel=2,
10+
)
211
if encoding is None:
312
encoding = 'utf-8'
413
if isinstance(text, bytes):
514
return text.decode(encoding, errors)
615
return text
716

817
def unicode_to_str(text, encoding=None, errors='strict'):
18+
warn(
19+
"The w3lib.utils.unicode_to_str function is deprecated and "
20+
"will be removed in a future release.",
21+
DeprecationWarning,
22+
stacklevel=2,
23+
)
924
if encoding is None:
1025
encoding = 'utf-8'
1126
if isinstance(text, str):
@@ -18,8 +33,9 @@ def to_unicode(text, encoding=None, errors='strict'):
1833
if isinstance(text, str):
1934
return text
2035
if not isinstance(text, (bytes, str)):
21-
raise TypeError('to_unicode must receive a bytes, str or unicode '
22-
'object, got %s' % type(text).__name__)
36+
raise TypeError(
37+
f'to_unicode must receive bytes or str, got {type(text).__name__}'
38+
)
2339
if encoding is None:
2440
encoding = 'utf-8'
2541
return text.decode(encoding, errors)
@@ -30,12 +46,20 @@ def to_bytes(text, encoding=None, errors='strict'):
3046
if isinstance(text, bytes):
3147
return text
3248
if not isinstance(text, str):
33-
raise TypeError('to_bytes must receive a unicode, str or bytes '
34-
'object, got %s' % type(text).__name__)
49+
raise TypeError(
50+
f'to_bytes must receive str or bytes, got {type(text).__name__}'
51+
)
3552
if encoding is None:
3653
encoding = 'utf-8'
3754
return text.encode(encoding, errors)
3855

3956
def to_native_str(text, encoding=None, errors='strict'):
4057
""" Return str representation of `text` """
58+
warn(
59+
"The w3lib.utils.to_native_str function is deprecated and "
60+
"will be removed in a future release. Please use "
61+
"w3lib.utils.to_unicode instead.",
62+
DeprecationWarning,
63+
stacklevel=2,
64+
)
4165
return to_unicode(text, encoding, errors)

0 commit comments

Comments
 (0)