Skip to content

Commit 11e6f98

Browse files
authored
PYTHON-1579 Update URI parser to adhere to new connection string spec (#755)
1 parent d77c204 commit 11e6f98

14 files changed

+173
-35
lines changed

.evergreen/config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ functions:
523523
silent: true
524524
script: |
525525
cat <<'EOF' > "${PROJECT_DIRECTORY}/prepare_mongodb_aws.sh"
526-
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote_plus(sys.argv[1]))"'
526+
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote(sys.argv[1]))"'
527527
USER=$(urlencode ${iam_auth_ecs_account})
528528
PASS=$(urlencode ${iam_auth_ecs_secret_access_key})
529529
MONGODB_URI="mongodb://$USER:$PASS@localhost"
@@ -554,7 +554,7 @@ functions:
554554
script: |
555555
# DO NOT ECHO WITH XTRACE (which PREPARE_SHELL does)
556556
cat <<'EOF' > "${PROJECT_DIRECTORY}/prepare_mongodb_aws.sh"
557-
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote_plus(sys.argv[1]))"'
557+
alias urlencode='${python3_binary} -c "import sys, urllib.parse as ulp; sys.stdout.write(ulp.quote(sys.argv[1]))"'
558558
alias jsonkey='${python3_binary} -c "import json,sys;sys.stdout.write(json.load(sys.stdin)[sys.argv[1]])" < ${DRIVERS_TOOLS}/.evergreen/auth_aws/creds.json'
559559
USER=$(jsonkey AccessKeyId)
560560
USER=$(urlencode $USER)

doc/changelog.rst

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,19 @@ Breaking Changes in 4.0
160160
are passed to the server as-is rather than the previous behavior which
161161
substituted in a projection of ``{"_id": 1}``. This means that an empty
162162
projection will now return the entire document, not just the ``"_id"`` field.
163-
- ``MongoClient()`` now raises a :exc:`~pymongo.errors.ConfigurationError`
163+
- :class:`~pymongo.mongo_client.MongoClient` now raises a :exc:`~pymongo.errors.ConfigurationError`
164164
when more than one URI is passed into the ``hosts`` argument.
165+
- :class:`~pymongo.mongo_client.MongoClient`` now raises an
166+
:exc:`~pymongo.errors.InvalidURI` exception
167+
when it encounters unescaped percent signs in username and password when
168+
parsing MongoDB URIs.
169+
- :class:`~pymongo.mongo_client.MongoClient` now uses
170+
:py::func:`urllib.parse.unquote` rather than
171+
:py:func:`urllib.parse.unquote_plus`,
172+
meaning that plus signs ("+") are no longer converted to spaces (" "). This
173+
means that if you were previously quoting your login information using
174+
quote_plus, you must now switch to quote. Additionally, be aware that this
175+
change only occurs when parsing login information from the URI.
165176

166177
Notable improvements
167178
....................

doc/examples/authentication.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ Percent-Escaping Username and Password
1111
--------------------------------------
1212

1313
Username and password must be percent-escaped with
14-
:meth:`urllib.parse.quote_plus`, to be used in a MongoDB URI. For example::
14+
:py:func:`urllib.parse.quote`, to be used in a MongoDB URI. For example::
1515

1616
>>> from pymongo import MongoClient
1717
>>> import urllib.parse
18-
>>> username = urllib.parse.quote_plus('user')
18+
>>> username = urllib.parse.quote('user')
1919
>>> username
2020
'user'
21-
>>> password = urllib.parse.quote_plus('pass/word')
21+
>>> password = urllib.parse.quote('pass/word')
2222
>>> password
2323
'pass%2Fword'
2424
>>> MongoClient('mongodb://%s:%[email protected]' % (username, password))

doc/migrate-to-pymongo4.rst

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,26 @@ now you must create a new instance.
190190
MongoClient raises exception when given more than one URI
191191
.........................................................
192192

193-
``MongoClient()`` now raises a :exc:`~pymongo.errors.ConfigurationError`
193+
:class:`~pymongo.mongo_client.MongoClient` now raises a :exc:`~pymongo.errors.ConfigurationError`
194194
when more than one URI is passed into the ``hosts`` argument.
195195

196+
MongoClient raises exception when given unescaped percent sign in login info
197+
............................................................................
198+
199+
:class:`~pymongo.mongo_client.MongoClient` now raises an
200+
:exc:`~pymongo.errors.InvalidURI` exception
201+
when it encounters unescaped percent signs in username and password.
202+
203+
MongoClient uses `unquote` rather than `unquote_plus` for login info
204+
....................................................................
205+
206+
:class:`~pymongo.mongo_client.MongoClient` now uses
207+
:py:func:`urllib.parse.unquote` rather than
208+
:py:func:`urllib.parse.unquote_plus`, meaning that space characters are no
209+
longer converted to plus signs. This means that if you were previously
210+
quoting your login information using :py:func:`urllib.parse.quote_plus`, you
211+
must now switch to :py:func:`urllib.parse.quote`.
212+
196213
Database
197214
--------
198215

pymongo/auth.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,6 @@ def _authenticate_gssapi(credentials, sock_info):
319319

320320
if password is not None:
321321
if _USE_PRINCIPAL:
322-
# Note that, though we use unquote_plus for unquoting URI
323-
# options, we use quote here. Microsoft's UrlUnescape (used
324-
# by WinKerberos) doesn't support +.
325322
principal = ":".join((quote(username), quote(password)))
326323
result, ctx = kerberos.authGSSClientInit(
327324
service, principal, gssflags=kerberos.GSS_C_MUTUAL_FLAG)

pymongo/mongo_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def __init__(
329329
a Unicode-related error occurs during BSON decoding that would
330330
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
331331
'strict', 'replace', and 'ignore'. Defaults to 'strict'.
332-
- ``srvServiceName`: (string) The SRV service name to use for
332+
- `srvServiceName`: (string) The SRV service name to use for
333333
"mongodb+srv://" URIs. Defaults to "mongodb". Use it like so::
334334
335335
MongoClient("mongodb+srv://example.com/?srvServiceName=customname")

pymongo/uri_parser.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import warnings
1919
import sys
2020

21-
from urllib.parse import unquote_plus
21+
from urllib.parse import unquote, unquote_plus
2222

2323
from pymongo.common import (
2424
SRV_SERVICE_NAME,
@@ -35,25 +35,44 @@
3535
DEFAULT_PORT = 27017
3636

3737

38+
def _unquoted_percent(s):
39+
"""Check for unescaped percent signs.
40+
41+
:Paramaters:
42+
- `s`: A string. `s` can have things like '%25', '%2525',
43+
and '%E2%85%A8' but cannot have unquoted percent like '%foo'.
44+
"""
45+
for i in range(len(s)):
46+
if s[i] == '%':
47+
sub = s[i:i+3]
48+
# If unquoting yields the same string this means there was an
49+
# unquoted %.
50+
if unquote(sub) == sub:
51+
return True
52+
return False
53+
3854
def parse_userinfo(userinfo):
3955
"""Validates the format of user information in a MongoDB URI.
40-
Reserved characters like ':', '/', '+' and '@' must be escaped
41-
following RFC 3986.
56+
Reserved characters that are gen-delimiters (":", "/", "?", "#", "[",
57+
"]", "@") as per RFC 3986 must be escaped.
4258
4359
Returns a 2-tuple containing the unescaped username followed
4460
by the unescaped password.
4561
4662
:Paramaters:
4763
- `userinfo`: A string of the form <username>:<password>
4864
"""
49-
if '@' in userinfo or userinfo.count(':') > 1:
65+
if ('@' in userinfo or userinfo.count(':') > 1 or
66+
_unquoted_percent(userinfo)):
5067
raise InvalidURI("Username and password must be escaped according to "
51-
"RFC 3986, use urllib.parse.quote_plus")
68+
"RFC 3986, use urllib.parse.quote")
69+
5270
user, _, passwd = userinfo.partition(":")
5371
# No password is expected with GSSAPI authentication.
5472
if not user:
5573
raise InvalidURI("The empty string is not valid username.")
56-
return unquote_plus(user), unquote_plus(passwd)
74+
75+
return unquote(user), unquote(passwd)
5776

5877

5978
def parse_ipv6_literal_host(entity, default_port):
@@ -408,6 +427,12 @@ def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False,
408427
wait for a response from the DNS server.
409428
- 'srv_service_name` (optional): A custom SRV service name
410429
430+
.. versionchanged:: 4.0
431+
To better follow RFC 3986, unquoted percent signs ("%") are no longer
432+
supported and plus signs ("+") are no longer decoded into spaces (" ")
433+
when decoding username and password. To avoid these issues, use
434+
:py:func:`urllib.parse.quote` when building the URI.
435+
411436
.. versionchanged:: 3.9
412437
Added the ``normalize`` parameter.
413438

test/connection_string/test/invalid-uris.json

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -189,15 +189,6 @@
189189
"auth": null,
190190
"options": null
191191
},
192-
{
193-
"description": "Username with password containing an unescaped colon",
194-
"uri": "mongodb://alice:foo:[email protected]",
195-
"valid": false,
196-
"warning": null,
197-
"hosts": null,
198-
"auth": null,
199-
"options": null
200-
},
201192
{
202193
"description": "Username containing an unescaped at-sign",
203194
"uri": "mongodb://alice@@127.0.0.1",
@@ -251,6 +242,51 @@
251242
"hosts": null,
252243
"auth": null,
253244
"options": null
245+
},
246+
{
247+
"description": "mongodb+srv with multiple service names",
248+
"uri": "mongodb+srv://test5.test.mongodb.com,test6.test.mongodb.com",
249+
"valid": false,
250+
"warning": null,
251+
"hosts": null,
252+
"auth": null,
253+
"options": null
254+
},
255+
{
256+
"description": "mongodb+srv with port number",
257+
"uri": "mongodb+srv://test7.test.mongodb.com:27018",
258+
"valid": false,
259+
"warning": null,
260+
"hosts": null,
261+
"auth": null,
262+
"options": null
263+
},
264+
{
265+
"description": "Username with password containing an unescaped percent sign",
266+
"uri": "mongodb://alice%foo:[email protected]",
267+
"valid": false,
268+
"warning": null,
269+
"hosts": null,
270+
"auth": null,
271+
"options": null
272+
},
273+
{
274+
"description": "Username with password containing an unescaped percent sign and an escaped one",
275+
"uri": "mongodb://user%20%:password@localhost",
276+
"valid": false,
277+
"warning": null,
278+
"hosts": null,
279+
"auth": null,
280+
"options": null
281+
},
282+
{
283+
"description": "Username with password containing an unescaped percent sign (non hex digit)",
284+
"uri": "mongodb://user%w:password@localhost",
285+
"valid": false,
286+
"warning": null,
287+
"hosts": null,
288+
"auth": null,
289+
"options": null
254290
}
255291
]
256292
}

test/connection_string/test/valid-auth.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,27 @@
240240
"authmechanism": "MONGODB-CR"
241241
}
242242
},
243+
{
244+
"description": "Subdelimiters in user/pass don't need escaping (MONGODB-CR)",
245+
"uri": "mongodb://!$&'()*+,;=:!$&'()*+,;[email protected]/admin?authMechanism=MONGODB-CR",
246+
"valid": true,
247+
"warning": false,
248+
"hosts": [
249+
{
250+
"type": "ipv4",
251+
"host": "127.0.0.1",
252+
"port": null
253+
}
254+
],
255+
"auth": {
256+
"username": "!$&'()*+,;=",
257+
"password": "!$&'()*+,;=",
258+
"db": "admin"
259+
},
260+
"options": {
261+
"authmechanism": "MONGODB-CR"
262+
}
263+
},
243264
{
244265
"description": "Escaped username (MONGODB-X509)",
245266
"uri": "mongodb://CN%3DmyName%2COU%3DmyOrgUnit%2CO%3DmyOrg%2CL%3DmyLocality%2CST%3DmyState%2CC%3DmyCountry@localhost/?authMechanism=MONGODB-X509",

test/connection_string/test/valid-host_identifiers.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,18 +132,18 @@
132132
},
133133
{
134134
"description": "UTF-8 hosts",
135-
"uri": "mongodb://b\u00fccher.example.com,uml\u00e4ut.example.com/",
135+
"uri": "mongodb://bücher.example.com,umläut.example.com/",
136136
"valid": true,
137137
"warning": false,
138138
"hosts": [
139139
{
140140
"type": "hostname",
141-
"host": "b\u00fccher.example.com",
141+
"host": "bücher.example.com",
142142
"port": null
143143
},
144144
{
145145
"type": "hostname",
146-
"host": "uml\u00e4ut.example.com",
146+
"host": "umläut.example.com",
147147
"port": null
148148
}
149149
],

0 commit comments

Comments
 (0)