Skip to content

Commit d4deaa3

Browse files
authored
Merge pull request #38 from SethMichaelLarson/percent-encode-percent
Percent-encode % characters
2 parents f45783e + 2627122 commit d4deaa3

File tree

9 files changed

+75
-16
lines changed

9 files changed

+75
-16
lines changed

.travis.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@ matrix:
99
include:
1010
- python: 2.7
1111
env: TOXENV=py27
12-
- python: 3.3
13-
env: TOXENV=py33
1412
- python: 3.4
1513
env: TOXENV=py34
1614
- python: 3.5
1715
env: TOXENV=py35
16+
- python: 3.6
17+
env: TOXENV=py36
18+
- python: 3.7
19+
env: TOXENV=py37
20+
dist: xenial
21+
sudo: true
1822
- python: pypy
1923
env: TOXENV=pypy
20-
- python: 3.5
24+
- python: 3.6
2125
env: TOXENV=flake8
2226
#- env: TOXENV=docs
2327

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
author_email='[email protected]',
2626
url='http://rfc3986.readthedocs.io',
2727
packages=packages,
28-
package_dir={'': 'src/'},
28+
package_dir={'': 'src'},
2929
package_data={'': ['LICENSE']},
3030
include_package_data=True,
3131
license='Apache 2.0',
@@ -37,9 +37,9 @@
3737
'Programming Language :: Python',
3838
'Programming Language :: Python :: 2.7',
3939
'Programming Language :: Python :: 3',
40-
'Programming Language :: Python :: 3.3',
4140
'Programming Language :: Python :: 3.4',
4241
'Programming Language :: Python :: 3.5',
4342
'Programming Language :: Python :: 3.6',
43+
'Programming Language :: Python :: 3.7',
4444
),
4545
)

src/rfc3986/abnf_regexp.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;="
2121
SUB_DELIMITERS_SET = set(SUB_DELIMITERS)
2222
# Escape the '*' for use in regular expressions
23-
SUB_DELIMITERS_RE = "!$&'()\*+,;="
23+
SUB_DELIMITERS_RE = r"!$&'()\*+,;="
2424
RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET)
2525
ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
2626
DIGIT = '0123456789'
2727
# https://tools.ietf.org/html/rfc3986#section-2.3
2828
UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + '._!-'
2929
UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS)
30-
NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET).union('%')
30+
NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET)
3131
# We need to escape the '-' in this case:
32-
UNRESERVED_RE = 'A-Za-z0-9._~\-'
32+
UNRESERVED_RE = r'A-Za-z0-9._~\-'
3333

3434
# Percent encoded character values
3535
PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}'
@@ -59,9 +59,9 @@
5959
# modified to ignore other matches that are not important to the parsing of
6060
# the reference so we can also simply use SRE_Match#groups.
6161
URL_PARSING_RE = (
62-
'(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
63-
'(?P<path>{path})(?:\?(?P<query>{query}))?'
64-
'(?:#(?P<fragment>{fragment}))?'
62+
r'(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
63+
r'(?P<path>{path})(?:\?(?P<query>{query}))?'
64+
r'(?:#(?P<fragment>{fragment}))?'
6565
).format(**COMPONENT_PATTERN_DICT)
6666

6767

@@ -120,7 +120,7 @@
120120
ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+'
121121
IPv6_ADDRZ_RE = IPv6_RE + '%25' + ZONE_ID
122122

123-
IP_LITERAL_RE = '\[({0}|(?:{1})|{2})\]'.format(
123+
IP_LITERAL_RE = r'\[({0}|(?:{1})|{2})\]'.format(
124124
IPv6_RE,
125125
IPv6_ADDRZ_RE,
126126
IPv_FUTURE_RE,

src/rfc3986/misc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@
7575
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
7676
SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE))
7777

78-
RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % (
78+
RELATIVE_REF_MATCHER = re.compile(r'^%s(\?%s)?(#%s)?$' % (
7979
abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE,
8080
abnf_regexp.FRAGMENT_RE,
8181
))
8282

8383
# See http://tools.ietf.org/html/rfc3986#section-4.3
84-
ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % (
84+
ABSOLUTE_URI_MATCHER = re.compile(r'^%s:%s(\?%s)?$' % (
8585
abnf_regexp.COMPONENT_PATTERN_DICT['scheme'],
8686
abnf_regexp.HIER_PART_RE,
8787
abnf_regexp.QUERY_RE[1:-1],

src/rfc3986/normalizers.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,22 @@ def encode_component(uri_component, encoding):
129129
if uri_component is None:
130130
return uri_component
131131

132+
# Try to see if the component we're encoding is already percent-encoded
133+
# so we can skip all '%' characters but still encode all others.
134+
percent_encodings = len(PERCENT_MATCHER.findall(
135+
compat.to_str(uri_component, encoding)))
136+
132137
uri_bytes = compat.to_bytes(uri_component, encoding)
138+
is_percent_encoded = percent_encodings == uri_bytes.count(b'%')
133139

134140
encoded_uri = bytearray()
135141

136142
for i in range(0, len(uri_bytes)):
137143
# Will return a single character bytestring on both Python 2 & 3
138144
byte = uri_bytes[i:i+1]
139145
byte_ord = ord(byte)
140-
if byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED:
146+
if ((is_percent_encoded and byte == b'%')
147+
or (byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED)):
141148
encoded_uri.extend(byte)
142149
continue
143150
encoded_uri.extend('%{0:02x}'.format(byte_ord).encode())

tests/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,20 @@ def test_handles_relative_uri(self, relative_uri):
119119
assert uri.scheme is None
120120
assert uri.authority == relative_uri[2:]
121121

122+
def test_handles_percent_in_path(self, uri_path_with_percent):
123+
"""Test that self.test_class encodes the % character properly."""
124+
uri = self.test_class.from_string(uri_path_with_percent)
125+
print(uri.path)
126+
assert uri.path == '/%25%20'
127+
128+
def test_handles_percent_in_query(self, uri_query_with_percent):
129+
uri = self.test_class.from_string(uri_query_with_percent)
130+
assert uri.query == 'a=%25'
131+
132+
def test_handles_percent_in_fragment(self, uri_fragment_with_percent):
133+
uri = self.test_class.from_string(uri_fragment_with_percent)
134+
assert uri.fragment == 'perc%25ent'
135+
122136

123137
class BaseTestUnsplits:
124138
test_class = None

tests/conftest.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,20 @@ def absolute_path_uri():
116116
def invalid_uri(request):
117117
return 'https://%s' % request.param
118118

119+
120+
@pytest.fixture(params=valid_hosts)
121+
def uri_path_with_percent(request):
122+
return 'https://%s/%% ' % request.param
123+
124+
125+
@pytest.fixture(params=valid_hosts)
126+
def uri_query_with_percent(request):
127+
return 'https://%s?a=%%' % request.param
128+
129+
130+
@pytest.fixture(params=valid_hosts)
131+
def uri_fragment_with_percent(request):
132+
return 'https://%s#perc%%ent' % request.param
133+
134+
119135
sys.path.insert(0, '.')

tests/test_builder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def test_add_path(path):
135135
([('a', 'b+c')], 'a=b%2Bc'),
136136
([('a', 'b'), ('c', 'd')], 'a=b&c=d'),
137137
([('a', 'b'), ('username', '@d')], 'a=b&username=%40d'),
138+
([('percent', '%')], 'percent=%25'),
138139
])
139140
def test_add_query_from(query_items, expected):
140141
"""Verify the behaviour of add_query_from."""

tests/test_normalizers.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
from rfc3986.uri import URIReference
55
from rfc3986.normalizers import (
6-
normalize_scheme, normalize_percent_characters, remove_dot_segments
6+
normalize_scheme, normalize_percent_characters,
7+
remove_dot_segments, encode_component,
78
)
89

910

@@ -76,3 +77,19 @@ def test_fragment_normalization():
7677
uri = URIReference(
7778
None, 'example.com', None, None, 'fiz%DF').normalize()
7879
assert uri.fragment == 'fiz%DF'
80+
81+
82+
@pytest.mark.parametrize(
83+
["component", "encoded_component"],
84+
[
85+
('/%', '/%25'),
86+
('/%a', '/%25a'),
87+
('/%ag', '/%25ag'),
88+
('/%af', '/%af'),
89+
('/%20/%', '/%2520/%25'),
90+
('/%20%25', '/%20%25'),
91+
('/%21%22%23%ah%12%ff', '/%2521%2522%2523%25ah%2512%25ff'),
92+
]
93+
)
94+
def test_detect_percent_encoded_component(component, encoded_component):
95+
assert encode_component(component, 'utf-8') == encoded_component

0 commit comments

Comments
 (0)