Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ jobs:
- python-version: 3.7
env:
TOXENV: docs
- python-version: 3.8
- python-version: 3.9
env:
TOXENV: flake8
- python-version: 3.8
- python-version: 3.9
env:
TOXENV: pylint
- python-version: 3.8
- python-version: 3.9
env:
TOXENV: security

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: Set up Python 3.8
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: 3.9

- name: Check Tag
id: check-release-tag
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [2.7, 3.5, 3.6, 3.7, 3.8, pypy3]
python-version: [3.6, 3.7, 3.8, 3.9, pypy3]

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ This is a Python library of web-related functions, such as:
Requirements
============

Python 2.7 or Python 3.5+
Python 3.6+

Install
=======
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Modules
Requirements
============

Python 2.7 or Python 3.3+
Python 3.6+

Install
=======
Expand Down
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[pytest]
doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES
flake8-ignore =
W503

docs/conf.py E121 E122 E265 E401 E501
tests/test_encoding.py E128 E221 E241 E302 E401 E501 E731
tests/test_form.py E265 E501
Expand Down
5 changes: 1 addition & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,13 @@
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Topic :: Internet :: WWW/HTTP',
],
install_requires=['six >= 1.4.1'],
)
2 changes: 0 additions & 2 deletions stdeb.cfg

This file was deleted.

27 changes: 17 additions & 10 deletions tests/test_encoding.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import unittest, codecs
import six
from w3lib.encoding import (html_body_declared_encoding, read_bom, to_unicode,
http_content_type_encoding, resolve_encoding, html_to_unicode)
import codecs
import unittest

from w3lib.encoding import (
html_body_declared_encoding,
http_content_type_encoding,
html_to_unicode,
read_bom,
resolve_encoding,
to_unicode,
)

class RequestEncodingTests(unittest.TestCase):
utf8_fragments = [
Expand Down Expand Up @@ -107,18 +114,18 @@ def test_unicode_body(self):
original_string = unicode_string.encode('cp1251')
encoding, body_unicode = html_to_unicode(ct('cp1251'), original_string)
# check body_as_unicode
self.assertTrue(isinstance(body_unicode, six.text_type))
self.assertTrue(isinstance(body_unicode, str))
self.assertEqual(body_unicode, unicode_string)

def _assert_encoding(self, content_type, body, expected_encoding,
expected_unicode):
assert not isinstance(body, six.text_type)
assert not isinstance(body, str)
encoding, body_unicode = html_to_unicode(ct(content_type), body)
self.assertTrue(isinstance(body_unicode, six.text_type))
self.assertTrue(isinstance(body_unicode, str))
self.assertEqual(norm_encoding(encoding),
norm_encoding(expected_encoding))

if isinstance(expected_unicode, six.string_types):
if isinstance(expected_unicode, str):
self.assertEqual(body_unicode, expected_unicode)
else:
self.assertTrue(
Expand Down Expand Up @@ -177,9 +184,9 @@ def test_replace_wrong_encoding(self):

def _assert_encoding_detected(self, content_type, expected_encoding, body,
**kwargs):
assert not isinstance(body, six.text_type)
assert not isinstance(body, str)
encoding, body_unicode = html_to_unicode(ct(content_type), body, **kwargs)
self.assertTrue(isinstance(body_unicode, six.text_type))
self.assertTrue(isinstance(body_unicode, str))
self.assertEqual(norm_encoding(encoding), norm_encoding(expected_encoding))

def test_BOM(self):
Expand Down
91 changes: 49 additions & 42 deletions tests/test_html.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
# -*- coding: utf-8 -*-
import unittest
import six
from w3lib.html import (replace_entities, replace_tags, remove_comments,
remove_tags_with_content, replace_escape_chars, remove_tags, unquote_markup,
get_base_url, get_meta_refresh)

from w3lib.html import (
get_base_url,
get_meta_refresh,
remove_comments,
remove_tags,
remove_tags_with_content,
replace_entities,
replace_escape_chars,
replace_tags,
unquote_markup,
)


class RemoveEntitiesTest(unittest.TestCase):
def test_returns_unicode(self):
# make sure it always return uncode
assert isinstance(replace_entities(b'no entities'), six.text_type)
assert isinstance(replace_entities(b'Price: £100!'), six.text_type)
assert isinstance(replace_entities(u'no entities'), six.text_type)
assert isinstance(replace_entities(u'Price: £100!'), six.text_type)
assert isinstance(replace_entities(b'no entities'), str)
assert isinstance(replace_entities(b'Price: £100!'), str)
assert isinstance(replace_entities(u'no entities'), str)
assert isinstance(replace_entities(u'Price: £100!'), str)

def test_regular(self):
# regular conversions
Expand Down Expand Up @@ -71,8 +78,8 @@ def test_encoding(self):
class ReplaceTagsTest(unittest.TestCase):
def test_returns_unicode(self):
# make sure it always return uncode
assert isinstance(replace_tags(b'no entities'), six.text_type)
assert isinstance(replace_tags('no entities'), six.text_type)
assert isinstance(replace_tags(b'no entities'), str)
assert isinstance(replace_tags('no entities'), str)

def test_replace_tags(self):
self.assertEqual(replace_tags(u'This text contains <a>some tag</a>'),
Expand All @@ -88,10 +95,10 @@ def test_replace_tags_multiline(self):
class RemoveCommentsTest(unittest.TestCase):
def test_returns_unicode(self):
# make sure it always return unicode
assert isinstance(remove_comments(b'without comments'), six.text_type)
assert isinstance(remove_comments(b'<!-- with comments -->'), six.text_type)
assert isinstance(remove_comments(u'without comments'), six.text_type)
assert isinstance(remove_comments(u'<!-- with comments -->'), six.text_type)
assert isinstance(remove_comments(b'without comments'), str)
assert isinstance(remove_comments(b'<!-- with comments -->'), str)
assert isinstance(remove_comments(u'without comments'), str)
assert isinstance(remove_comments(u'<!-- with comments -->'), str)

def test_no_comments(self):
# text without comments
Expand All @@ -112,16 +119,16 @@ def test_remove_comments(self):
class RemoveTagsTest(unittest.TestCase):
def test_returns_unicode(self):
# make sure it always return unicode
assert isinstance(remove_tags(b'no tags'), six.text_type)
assert isinstance(remove_tags(b'no tags', which_ones=('p',)), six.text_type)
assert isinstance(remove_tags(b'<p>one tag</p>'), six.text_type)
assert isinstance(remove_tags(b'<p>one tag</p>', which_ones=('p')), six.text_type)
assert isinstance(remove_tags(b'<a>link</a>', which_ones=('b',)), six.text_type)
assert isinstance(remove_tags(u'no tags'), six.text_type)
assert isinstance(remove_tags(u'no tags', which_ones=('p',)), six.text_type)
assert isinstance(remove_tags(u'<p>one tag</p>'), six.text_type)
assert isinstance(remove_tags(u'<p>one tag</p>', which_ones=('p')), six.text_type)
assert isinstance(remove_tags(u'<a>link</a>', which_ones=('b',)), six.text_type)
assert isinstance(remove_tags(b'no tags'), str)
assert isinstance(remove_tags(b'no tags', which_ones=('p',)), str)
assert isinstance(remove_tags(b'<p>one tag</p>'), str)
assert isinstance(remove_tags(b'<p>one tag</p>', which_ones=('p')), str)
assert isinstance(remove_tags(b'<a>link</a>', which_ones=('b',)), str)
assert isinstance(remove_tags(u'no tags'), str)
assert isinstance(remove_tags(u'no tags', which_ones=('p',)), str)
assert isinstance(remove_tags(u'<p>one tag</p>'), str)
assert isinstance(remove_tags(u'<p>one tag</p>', which_ones=('p')), str)
assert isinstance(remove_tags(u'<a>link</a>', which_ones=('b',)), str)

def test_remove_tags_without_tags(self):
# text without tags
Expand Down Expand Up @@ -160,14 +167,14 @@ def test_uppercase_tags(self):
class RemoveTagsWithContentTest(unittest.TestCase):
def test_returns_unicode(self):
# make sure it always return unicode
assert isinstance(remove_tags_with_content(b'no tags'), six.text_type)
assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), six.text_type)
assert isinstance(remove_tags_with_content(b'<p>one tag</p>', which_ones=('p',)), six.text_type)
assert isinstance(remove_tags_with_content(b'<a>link</a>', which_ones=('b',)), six.text_type)
assert isinstance(remove_tags_with_content(u'no tags'), six.text_type)
assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), six.text_type)
assert isinstance(remove_tags_with_content(u'<p>one tag</p>', which_ones=('p',)), six.text_type)
assert isinstance(remove_tags_with_content(u'<a>link</a>', which_ones=('b',)), six.text_type)
assert isinstance(remove_tags_with_content(b'no tags'), str)
assert isinstance(remove_tags_with_content(b'no tags', which_ones=('p',)), str)
assert isinstance(remove_tags_with_content(b'<p>one tag</p>', which_ones=('p',)), str)
assert isinstance(remove_tags_with_content(b'<a>link</a>', which_ones=('b',)), str)
assert isinstance(remove_tags_with_content(u'no tags'), str)
assert isinstance(remove_tags_with_content(u'no tags', which_ones=('p',)), str)
assert isinstance(remove_tags_with_content(u'<p>one tag</p>', which_ones=('p',)), str)
assert isinstance(remove_tags_with_content(u'<a>link</a>', which_ones=('b',)), str)

def test_without_tags(self):
# text without tags
Expand All @@ -194,13 +201,13 @@ def test_tags_with_shared_prefix(self):
class ReplaceEscapeCharsTest(unittest.TestCase):
def test_returns_unicode(self):
# make sure it always return unicode
assert isinstance(replace_escape_chars(b'no ec'), six.text_type)
assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), six.text_type)
assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), six.text_type)
assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), six.text_type)
assert isinstance(replace_escape_chars(u'no ec'), six.text_type)
assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), six.text_type)
assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), six.text_type)
assert isinstance(replace_escape_chars(b'no ec'), str)
assert isinstance(replace_escape_chars(b'no ec', replace_by='str'), str)
assert isinstance(replace_escape_chars(b'no ec', replace_by=u'str'), str)
assert isinstance(replace_escape_chars(b'no ec', which_ones=('\n', '\t',)), str)
assert isinstance(replace_escape_chars(u'no ec'), str)
assert isinstance(replace_escape_chars(u'no ec', replace_by=u'str'), str)
assert isinstance(replace_escape_chars(u'no ec', which_ones=('\n', '\t',)), str)

def test_without_escape_chars(self):
# text without escape chars
Expand All @@ -226,8 +233,8 @@ class UnquoteMarkupTest(unittest.TestCase):

def test_returns_unicode(self):
# make sure it always return unicode
assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), six.text_type)
assert isinstance(unquote_markup(self.sample_txt2), six.text_type)
assert isinstance(unquote_markup(self.sample_txt1.encode('latin-1')), str)
assert isinstance(unquote_markup(self.sample_txt2), str)

def test_unquote_markup(self):
self.assertEqual(unquote_markup(self.sample_txt1), u"""<node1>hi, this is sample text with entities: & \xa9
Expand Down
24 changes: 17 additions & 7 deletions tests/test_url.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import os
import unittest
from urllib.parse import urlparse

import pytest
from six.moves.urllib.parse import urlparse

from w3lib.url import (is_url, safe_url_string, safe_download_url,
url_query_parameter, add_or_replace_parameter, url_query_cleaner,
file_uri_to_path, parse_data_uri, path_to_file_uri, any_to_uri,
urljoin_rfc, canonicalize_url, parse_url, add_or_replace_parameters)
from w3lib.url import (
add_or_replace_parameter,
add_or_replace_parameters,
any_to_uri,
canonicalize_url,
file_uri_to_path,
is_url,
parse_data_uri,
parse_url,
path_to_file_uri,
safe_download_url,
safe_url_string,
url_query_parameter,
url_query_cleaner,
urljoin_rfc,
)


class UrlTests(unittest.TestCase):
Expand Down
7 changes: 2 additions & 5 deletions w3lib/form.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import warnings
import six
if six.PY2:
from cStringIO import StringIO as BytesIO
else:
from io import BytesIO
from io import BytesIO

from w3lib.util import unicode_to_str


Expand Down
Loading