Skip to content

Commit 4938244

Browse files
[PATCH] Apply some changes as per review (- WIP PR #373 -)
* This work is related to GHI #213 Changes in file docs/utils.py: * added documentation for new utilities. * added type-hints (- WIP #134 -) * related work
1 parent 93521bf commit 4938244

File tree

1 file changed

+42
-9
lines changed

1 file changed

+42
-9
lines changed

docs/utils.py

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535

3636
# URL allowed domain list
3737
# Enforces:
38-
# - URLs Must belone to one of these domains
39-
URL_ALLOWED_NETLOCS = {"github.com", "readthedocs.com"}
38+
# - URLs Must belong to one of these domains
39+
URL_ALLOWED_NETLOCS = {"github.com", "readthedocs.com", "docs.python.org"}
4040

4141

4242
def _validate_git_ref(ref: str) -> str:
@@ -141,8 +141,32 @@ def slugify_header(s: str) -> str:
141141
return re.sub(r'[-\s]+', "-", text)
142142

143143

144-
def sanitize_url(url):
145-
"""ADD DOCS.
144+
def sanitize_url(url: str) -> str:
145+
"""
146+
Sanitize and validate a URL according to allowed schemes and domains.
147+
148+
This function validates that the URL uses an allowed scheme (https) and points
149+
to a trusted domain, then safely encodes its path and query components.
150+
151+
Args:
152+
url (str) -- The URL to sanitize.
153+
154+
Returns:
155+
str -- The sanitized URL.
156+
157+
Raises:
158+
ValueError -- If the URL has an invalid scheme or points to an untrusted domain.
159+
160+
161+
Unit-Testing:
162+
163+
Testcase 1: Basic URL with spaces and special characters.
164+
165+
>>> url_fxtr = "https://github.com/user/Hello World!"
166+
>>> utils.sanitize_url(url_fxtr)
167+
'https://github.com/user/Hello%20World%21'
168+
>>>
169+
146170
"""
147171
parsed_url = urlparse(url)
148172
# Validate scheme
@@ -155,18 +179,27 @@ def sanitize_url(url):
155179
sanitized_path = quote(parsed_url.path)
156180
sanitized_query = quote(parsed_url.query)
157181
# Reconstruct the sanitized URL
158-
sanitized_url = urlunparse((
182+
return urlunparse((
159183
parsed_url.scheme,
160184
parsed_url.netloc,
161185
sanitized_path,
162186
parsed_url.params,
163187
sanitized_query,
164-
parsed_url.fragment
188+
parsed_url.fragment,
165189
))
166-
return sanitized_url
167190

168191

169-
def sanitize_intersphinx_mapping(mapping):
170-
"""ADD DOCS.
192+
def sanitize_intersphinx_mapping(mapping: dict) -> dict:
193+
"""
194+
Sanitize URLs in an intersphinx mapping dictionary.
195+
196+
This function applies URL sanitization to each URL in the mapping while
197+
preserving the associated extra values.
198+
199+
Args:
200+
mapping (dict) -- A dictionary mapping names to tuples of (url, extra_value).
201+
202+
Returns:
203+
dict -- A dictionary with the same structure but with sanitized URLs.
171204
"""
172205
return {key: (sanitize_url(url), extra_value) for key, (url, extra_value) in mapping.items()}

0 commit comments

Comments
 (0)