Skip to content

Commit bf71fbb

Browse files
committed
Percent-encode % chars
1 parent 83b2a4c commit bf71fbb

File tree

6 files changed

+49
-10
lines changed

6 files changed

+49
-10
lines changed

src/rfc3986/abnf_regexp.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;="
2121
SUB_DELIMITERS_SET = set(SUB_DELIMITERS)
2222
# Escape the '*' for use in regular expressions
23-
SUB_DELIMITERS_RE = "!$&'()\*+,;="
23+
SUB_DELIMITERS_RE = r"!$&'()\*+,;="
2424
RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET)
2525
ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
2626
DIGIT = '0123456789'
2727
# https://tools.ietf.org/html/rfc3986#section-2.3
2828
UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + '._!-'
2929
UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS)
30-
NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET).union('%')
30+
NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET)
3131
# We need to escape the '-' in this case:
32-
UNRESERVED_RE = 'A-Za-z0-9._~\-'
32+
UNRESERVED_RE = r'A-Za-z0-9._~\-'
3333

3434
# Percent encoded character values
3535
PERCENT_ENCODED = PCT_ENCODED = '%[A-Fa-f0-9]{2}'
@@ -59,9 +59,9 @@
5959
# modified to ignore other matches that are not important to the parsing of
6060
# the reference so we can also simply use SRE_Match#groups.
6161
URL_PARSING_RE = (
62-
'(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
63-
'(?P<path>{path})(?:\?(?P<query>{query}))?'
64-
'(?:#(?P<fragment>{fragment}))?'
62+
r'(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?'
63+
r'(?P<path>{path})(?:\?(?P<query>{query}))?'
64+
r'(?:#(?P<fragment>{fragment}))?'
6565
).format(**COMPONENT_PATTERN_DICT)
6666

6767

@@ -120,7 +120,7 @@
120120
ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+'
121121
IPv6_ADDRZ_RE = IPv6_RE + '%25' + ZONE_ID
122122

123-
IP_LITERAL_RE = '\[({0}|(?:{1})|{2})\]'.format(
123+
IP_LITERAL_RE = r'\[({0}|(?:{1})|{2})\]'.format(
124124
IPv6_RE,
125125
IPv6_ADDRZ_RE,
126126
IPv_FUTURE_RE,

src/rfc3986/misc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@
7575
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
7676
SCHEME_MATCHER = re.compile('^{0}$'.format(abnf_regexp.SCHEME_RE))
7777

78-
RELATIVE_REF_MATCHER = re.compile('^%s(\?%s)?(#%s)?$' % (
78+
RELATIVE_REF_MATCHER = re.compile(r'^%s(\?%s)?(#%s)?$' % (
7979
abnf_regexp.RELATIVE_PART_RE, abnf_regexp.QUERY_RE,
8080
abnf_regexp.FRAGMENT_RE,
8181
))
8282

8383
# See http://tools.ietf.org/html/rfc3986#section-4.3
84-
ABSOLUTE_URI_MATCHER = re.compile('^%s:%s(\?%s)?$' % (
84+
ABSOLUTE_URI_MATCHER = re.compile(r'^%s:%s(\?%s)?$' % (
8585
abnf_regexp.COMPONENT_PATTERN_DICT['scheme'],
8686
abnf_regexp.HIER_PART_RE,
8787
abnf_regexp.QUERY_RE[1:-1],

src/rfc3986/normalizers.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,23 @@ def encode_component(uri_component, encoding):
129129
if uri_component is None:
130130
return uri_component
131131

132+
# Try to see if the component we're encoding is already percent-encoded
133+
# so we can skip all '%' characters but still encode all others.
134+
percent_encodings = len(PERCENT_MATCHER.findall(
135+
compat.to_str(uri_component, encoding)))
136+
132137
uri_bytes = compat.to_bytes(uri_component, encoding)
138+
is_percent_encoded = (percent_encodings > 0
139+
and percent_encodings == uri_bytes.count(b'%'))
133140

134141
encoded_uri = bytearray()
135142

136143
for i in range(0, len(uri_bytes)):
137144
# Will return a single character bytestring on both Python 2 & 3
138145
byte = uri_bytes[i:i+1]
139146
byte_ord = ord(byte)
140-
if byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED:
147+
if ((is_percent_encoded and byte == b'%')
148+
or (byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED)):
141149
encoded_uri.extend(byte)
142150
continue
143151
encoded_uri.extend('%{0:02x}'.format(byte_ord).encode())

tests/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,20 @@ def test_handles_relative_uri(self, relative_uri):
119119
assert uri.scheme is None
120120
assert uri.authority == relative_uri[2:]
121121

122+
def test_handles_percent_in_path(self, uri_path_with_percent):
123+
"""Test that self.test_class encodes the % character properly."""
124+
uri = self.test_class.from_string(uri_path_with_percent)
125+
print(uri.path)
126+
assert uri.path == '/%25%20'
127+
128+
def test_handles_percent_in_query(self, uri_query_with_percent):
129+
uri = self.test_class.from_string(uri_query_with_percent)
130+
assert uri.query == 'a=%25'
131+
132+
def test_handles_percent_in_fragment(self, uri_fragment_with_percent):
133+
uri = self.test_class.from_string(uri_fragment_with_percent)
134+
assert uri.fragment == 'perc%25ent'
135+
122136

123137
class BaseTestUnsplits:
124138
test_class = None

tests/conftest.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,20 @@ def absolute_path_uri():
116116
def invalid_uri(request):
117117
return 'https://%s' % request.param
118118

119+
120+
@pytest.fixture(params=valid_hosts)
121+
def uri_path_with_percent(request):
122+
return 'https://%s/%% ' % request.param
123+
124+
125+
@pytest.fixture(params=valid_hosts)
126+
def uri_query_with_percent(request):
127+
return 'https://%s?a=%%' % request.param
128+
129+
130+
@pytest.fixture(params=valid_hosts)
131+
def uri_fragment_with_percent(request):
132+
return 'https://%s#perc%%ent' % request.param
133+
134+
119135
sys.path.insert(0, '.')

tests/test_builder.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ def test_add_path(path):
135135
([('a', 'b+c')], 'a=b%2Bc'),
136136
([('a', 'b'), ('c', 'd')], 'a=b&c=d'),
137137
([('a', 'b'), ('username', '@d')], 'a=b&username=%40d'),
138+
([('percent', '%')], 'percent=%25'),
138139
])
139140
def test_add_query_from(query_items, expected):
140141
"""Verify the behaviour of add_query_from."""

0 commit comments

Comments
 (0)