Skip to content

Commit a94feb8

Browse files
committed
INTPYTHON-732 Don't parse the query string args
Instead, pass them to HOST
1 parent f597d82 commit a94feb8

File tree

2 files changed

+159
-42
lines changed

2 files changed

+159
-42
lines changed

django_mongodb_backend/utils.py

Lines changed: 69 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import copy
22
import time
3+
from urllib.parse import parse_qsl, quote, urlsplit
34

45
import django
56
from django.conf import settings
@@ -31,39 +32,84 @@ def check_django_compatability():
3132
def parse_uri(uri, *, db_name=None, options=None, test=None):
3233
"""
3334
Convert the given uri into a dictionary suitable for Django's DATABASES
34-
setting.
35+
setting. Keep query string args on HOST (not in OPTIONS).
36+
37+
Behavior:
38+
- Non-SRV: HOST = "<host[,host2:port2]><?query>", no scheme/path.
39+
- SRV: HOST = "mongodb+srv://<fqdn><?query>", no path.
40+
- NAME is db_name if provided else the db in the URI path (required).
41+
- If the URI has a db path and no authSource in the query, append it.
42+
- options kwarg merges by appending to the HOST query (last-one-wins for
43+
single-valued options), without re-encoding existing query content.
44+
- PORT is set only for single-host URIs; multi-host and SRV => PORT=None.
3545
"""
36-
uri = pymongo_parse_uri(uri)
37-
host = None
38-
port = None
39-
if uri["fqdn"]:
40-
# This is a SRV URI and the host is the fqdn.
41-
host = f"mongodb+srv://{uri['fqdn']}"
46+
parsed = pymongo_parse_uri(uri)
47+
split = urlsplit(uri)
48+
49+
# Keep the original query string verbatim to avoid breaking special
50+
# options like readPreferenceTags.
51+
query_str = split.query or ""
52+
53+
# Determine NAME; must come from db_name or the URI path.
54+
db = db_name or parsed.get("database")
55+
if not db:
56+
raise ImproperlyConfigured("You must provide the db_name parameter.")
57+
58+
# Helper: check if a key is present in the existing query (case-sensitive).
59+
def query_has_key(key: str) -> bool:
60+
return any(k == key for k, _ in parse_qsl(query_str, keep_blank_values=True))
61+
62+
# If URI path had a database and no authSource is present, append it.
63+
if parsed.get("database") and not query_has_key("authSource"):
64+
suffix = f"authSource={quote(parsed['database'], safe='')}"
65+
query_str = f"{query_str}&{suffix}" if query_str else suffix
66+
67+
# Merge options by appending them (so "last one wins" for single-valued opts).
68+
if options:
69+
for k, v in options.items():
70+
# Convert value to string as expected in URIs.
71+
v_str = ("true" if v else "false") if isinstance(v, bool) else str(v)
72+
# Preserve ':' and ',' unescaped (important for readPreferenceTags).
73+
v_enc = quote(v_str, safe=":,")
74+
pair = f"{k}={v_enc}"
75+
query_str = f"{query_str}&{pair}" if query_str else pair
76+
77+
# Build HOST (and PORT) based on SRV vs. standard.
78+
if parsed.get("fqdn"): # SRV URI
79+
host_base = f"mongodb+srv://{parsed['fqdn']}"
80+
port = None
4281
else:
43-
nodelist = uri.get("nodelist")
82+
nodelist = parsed.get("nodelist") or []
4483
if len(nodelist) == 1:
45-
host, port = nodelist[0]
84+
h, p = nodelist[0]
85+
host_base = h
86+
port = p
4687
elif len(nodelist) > 1:
47-
host = ",".join([f"{host}:{port}" for host, port in nodelist])
48-
db_name = db_name or uri["database"]
49-
if not db_name:
50-
raise ImproperlyConfigured("You must provide the db_name parameter.")
51-
opts = uri.get("options")
52-
if options:
53-
opts.update(options)
88+
# Ensure explicit ports for each host (default 27017 if missing).
89+
parts = [f"{h}:{(p if p is not None else 27017)}" for h, p in nodelist]
90+
host_base = ",".join(parts)
91+
port = None
92+
else:
93+
# Fallback for unusual/invalid URIs.
94+
host_base = split.netloc.split("@")[-1]
95+
port = None
96+
97+
host_with_query = f"{host_base}?{query_str}" if query_str else host_base
98+
5499
settings_dict = {
55100
"ENGINE": "django_mongodb_backend",
56-
"NAME": db_name,
57-
"HOST": host,
101+
"NAME": db,
102+
"HOST": host_with_query,
58103
"PORT": port,
59-
"USER": uri.get("username"),
60-
"PASSWORD": uri.get("password"),
61-
"OPTIONS": opts,
104+
"USER": parsed.get("username"),
105+
"PASSWORD": parsed.get("password"),
106+
# Options remain empty; all query args live in HOST.
107+
"OPTIONS": {},
62108
}
63-
if "authSource" not in settings_dict["OPTIONS"] and uri["database"]:
64-
settings_dict["OPTIONS"]["authSource"] = uri["database"]
109+
65110
if test:
66111
settings_dict["TEST"] = test
112+
67113
return settings_dict
68114

69115

tests/backend_/utils/test_parse_uri.py

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,18 @@ def test_simple_uri(self):
1212
settings_dict = parse_uri("mongodb://cluster0.example.mongodb.net/myDatabase")
1313
self.assertEqual(settings_dict["ENGINE"], "django_mongodb_backend")
1414
self.assertEqual(settings_dict["NAME"], "myDatabase")
15-
self.assertEqual(settings_dict["HOST"], "cluster0.example.mongodb.net")
16-
self.assertEqual(settings_dict["OPTIONS"], {"authSource": "myDatabase"})
15+
# Default authSource derived from URI path db is appended to HOST
16+
self.assertEqual(
17+
settings_dict["HOST"], "cluster0.example.mongodb.net?authSource=myDatabase"
18+
)
19+
self.assertEqual(settings_dict["OPTIONS"], {})
1720

1821
def test_db_name(self):
1922
settings_dict = parse_uri("mongodb://cluster0.example.mongodb.net/", db_name="myDatabase")
2023
self.assertEqual(settings_dict["ENGINE"], "django_mongodb_backend")
2124
self.assertEqual(settings_dict["NAME"], "myDatabase")
2225
self.assertEqual(settings_dict["HOST"], "cluster0.example.mongodb.net")
26+
# No default authSource injected when the URI has no database path
2327
self.assertEqual(settings_dict["OPTIONS"], {})
2428

2529
def test_db_name_overrides_default_auth_db(self):
@@ -28,8 +32,11 @@ def test_db_name_overrides_default_auth_db(self):
2832
)
2933
self.assertEqual(settings_dict["ENGINE"], "django_mongodb_backend")
3034
self.assertEqual(settings_dict["NAME"], "myDatabase")
31-
self.assertEqual(settings_dict["HOST"], "cluster0.example.mongodb.net")
32-
self.assertEqual(settings_dict["OPTIONS"], {"authSource": "default_auth_db"})
35+
# authSource defaults to the database from the URI, not db_name
36+
self.assertEqual(
37+
settings_dict["HOST"], "cluster0.example.mongodb.net?authSource=default_auth_db"
38+
)
39+
self.assertEqual(settings_dict["OPTIONS"], {})
3340

3441
def test_no_database(self):
3542
msg = "You must provide the db_name parameter."
@@ -43,55 +50,71 @@ def test_srv_uri_with_options(self):
4350
with patch("dns.resolver.resolve"):
4451
settings_dict = parse_uri(uri)
4552
self.assertEqual(settings_dict["NAME"], "my_database")
46-
self.assertEqual(settings_dict["HOST"], "mongodb+srv://cluster0.example.mongodb.net")
53+
# HOST includes scheme + fqdn only (no path), with query
54+
# preserved and default authSource appended
55+
self.assertTrue(
56+
settings_dict["HOST"].startswith("mongodb+srv://cluster0.example.mongodb.net?")
57+
)
58+
self.assertIn("retryWrites=true", settings_dict["HOST"])
59+
self.assertIn("w=majority", settings_dict["HOST"])
60+
self.assertIn("authSource=my_database", settings_dict["HOST"])
4761
self.assertEqual(settings_dict["USER"], "my_user")
4862
self.assertEqual(settings_dict["PASSWORD"], "my_password")
4963
self.assertIsNone(settings_dict["PORT"])
50-
self.assertEqual(
51-
settings_dict["OPTIONS"],
52-
{"authSource": "my_database", "retryWrites": True, "w": "majority", "tls": True},
53-
)
64+
# No options copied into OPTIONS; they live in HOST query
65+
self.assertEqual(settings_dict["OPTIONS"], {})
5466

5567
def test_localhost(self):
5668
settings_dict = parse_uri("mongodb://localhost/db")
57-
self.assertEqual(settings_dict["HOST"], "localhost")
69+
# Default authSource appended to HOST
70+
self.assertEqual(settings_dict["HOST"], "localhost?authSource=db")
5871
self.assertEqual(settings_dict["PORT"], 27017)
5972

6073
def test_localhost_with_port(self):
6174
settings_dict = parse_uri("mongodb://localhost:27018/db")
62-
self.assertEqual(settings_dict["HOST"], "localhost")
75+
# HOST omits the path and port, keeps only host + query
76+
self.assertEqual(settings_dict["HOST"], "localhost?authSource=db")
6377
self.assertEqual(settings_dict["PORT"], 27018)
6478

6579
def test_hosts_with_ports(self):
6680
settings_dict = parse_uri("mongodb://localhost:27017,localhost:27018/db")
67-
self.assertEqual(settings_dict["HOST"], "localhost:27017,localhost:27018")
81+
# For multi-host, PORT is None and HOST carries the full host list plus query
82+
self.assertEqual(settings_dict["HOST"], "localhost:27017,localhost:27018?authSource=db")
6883
self.assertEqual(settings_dict["PORT"], None)
6984

7085
def test_hosts_without_ports(self):
7186
settings_dict = parse_uri("mongodb://host1.net,host2.net/db")
72-
self.assertEqual(settings_dict["HOST"], "host1.net:27017,host2.net:27017")
87+
# Default ports are added to each host in HOST, plus the query
88+
self.assertEqual(settings_dict["HOST"], "host1.net:27017,host2.net:27017?authSource=db")
7389
self.assertEqual(settings_dict["PORT"], None)
7490

7591
def test_auth_source_in_query_string(self):
7692
settings_dict = parse_uri("mongodb://localhost/?authSource=auth", db_name="db")
7793
self.assertEqual(settings_dict["NAME"], "db")
78-
self.assertEqual(settings_dict["OPTIONS"], {"authSource": "auth"})
94+
# Keep original query intact in HOST; do not duplicate into OPTIONS
95+
self.assertEqual(settings_dict["HOST"], "localhost?authSource=auth")
96+
self.assertEqual(settings_dict["OPTIONS"], {})
7997

8098
def test_auth_source_in_query_string_overrides_defaultauthdb(self):
8199
settings_dict = parse_uri("mongodb://localhost/db?authSource=auth")
82100
self.assertEqual(settings_dict["NAME"], "db")
83-
self.assertEqual(settings_dict["OPTIONS"], {"authSource": "auth"})
101+
# Query-provided authSource overrides default; kept in HOST only
102+
self.assertEqual(settings_dict["HOST"], "localhost?authSource=auth")
103+
self.assertEqual(settings_dict["OPTIONS"], {})
84104

85105
def test_options_kwarg(self):
86106
options = {"authSource": "auth", "retryWrites": True}
87107
settings_dict = parse_uri(
88108
"mongodb://cluster0.example.mongodb.net/myDatabase?retryWrites=false&retryReads=true",
89109
options=options,
90110
)
91-
self.assertEqual(
92-
settings_dict["OPTIONS"],
93-
{"authSource": "auth", "retryWrites": True, "retryReads": True},
94-
)
111+
# options kwarg overrides same-key query params; query-only keys are kept.
112+
# All options live in HOST's query string; OPTIONS is empty.
113+
self.assertTrue(settings_dict["HOST"].startswith("cluster0.example.mongodb.net?"))
114+
self.assertIn("authSource=auth", settings_dict["HOST"])
115+
self.assertIn("retryWrites=true", settings_dict["HOST"]) # overridden
116+
self.assertIn("retryReads=true", settings_dict["HOST"]) # preserved
117+
self.assertEqual(settings_dict["OPTIONS"], {})
95118

96119
def test_test_kwarg(self):
97120
settings_dict = parse_uri("mongodb://localhost/db", test={"NAME": "test_db"})
@@ -105,3 +128,51 @@ def test_invalid_credentials(self):
105128
def test_no_scheme(self):
106129
with self.assertRaisesMessage(pymongo.errors.InvalidURI, "Invalid URI scheme"):
107130
parse_uri("cluster0.example.mongodb.net")
131+
132+
def test_read_preference_tags_in_host_query_allows_mongoclient_construction(self):
133+
"""
134+
Ensure readPreferenceTags preserved in the HOST query string can be parsed by
135+
MongoClient without raising validation errors, and result in correct tag sets.
136+
This verifies we no longer rely on pymongo's normalized options dict for tags.
137+
"""
138+
uri = (
139+
"mongodb://localhost/"
140+
"?readPreference=secondary"
141+
"&readPreferenceTags=dc:ny,other:sf"
142+
"&readPreferenceTags=dc:2,other:1"
143+
)
144+
145+
# Baseline: demonstrate why relying on parsed options can be problematic.
146+
parsed = pymongo.uri_parser.parse_uri(uri)
147+
# Some PyMongo versions normalize this into a dict (invalid as a kwarg), others into a list.
148+
# If it's a dict, passing it as a kwarg will raise a ValueError as shown in the issue.
149+
# We only assert no crash in our new path below; this is informational.
150+
if isinstance(parsed["options"].get("readPreferenceTags"), dict):
151+
with self.assertRaises(ValueError):
152+
pymongo.MongoClient(readPreferenceTags=parsed["options"]["readPreferenceTags"])
153+
154+
# New behavior: keep the raw query on HOST, not in OPTIONS.
155+
settings_dict = parse_uri(uri, db_name="db")
156+
host_with_query = settings_dict["HOST"]
157+
# Compose a full URI for MongoClient (non-SRV -> prepend scheme
158+
# and ensure "/?" before query)
159+
if host_with_query.startswith("mongodb+srv://"):
160+
full_uri = host_with_query # SRV already includes scheme
161+
else:
162+
if "?" in host_with_query:
163+
base, q = host_with_query.split("?", 1)
164+
full_uri = f"mongodb://{base}/?{q}"
165+
else:
166+
full_uri = f"mongodb://{host_with_query}/"
167+
168+
# Constructing MongoClient should not raise, and should reflect the read preference + tags.
169+
client = pymongo.MongoClient(full_uri, serverSelectionTimeoutMS=1)
170+
try:
171+
doc = client.read_preference.document
172+
self.assertEqual(doc.get("mode"), "secondary")
173+
self.assertEqual(
174+
doc.get("tags"),
175+
[{"dc": "ny", "other": "sf"}, {"dc": "2", "other": "1"}],
176+
)
177+
finally:
178+
client.close()

0 commit comments

Comments
 (0)