3535
3636# URL allowed domain list
3737# Enforces:
38- # - URLs Must belone to one of these domains
39- URL_ALLOWED_NETLOCS = {"github.com" , "readthedocs.com" }
38+ # - URLs Must belong to one of these domains
39+ URL_ALLOWED_NETLOCS = {"github.com" , "readthedocs.com" , "docs.python.org" }
4040
4141
4242def _validate_git_ref (ref : str ) -> str :
@@ -141,8 +141,32 @@ def slugify_header(s: str) -> str:
141141 return re .sub (r'[-\s]+' , "-" , text )
142142
143143
144- def sanitize_url (url ):
145- """ADD DOCS.
144+ def sanitize_url (url : str ) -> str :
145+ """
146+ Sanitize and validate a URL according to allowed schemes and domains.
147+
148+ This function validates that the URL uses an allowed scheme (https) and points
149+ to a trusted domain, then safely encodes its path and query components.
150+
151+ Args:
152+ url (str) -- The URL to sanitize.
153+
154+ Returns:
155+ str -- The sanitized URL.
156+
157+ Raises:
158+ ValueError -- If the URL has an invalid scheme or points to an untrusted domain.
159+
160+
161+ Unit-Testing:
162+
163+ Testcase 1: Basic URL with spaces and special characters.
164+
165+ >>> url_fxtr = "https://github.com/user/Hello World!"
166+ >>> utils.sanitize_url(url_fxtr)
167+ 'https://github.com/user/Hello%20World%21'
168+ >>>
169+
146170 """
147171 parsed_url = urlparse (url )
148172 # Validate scheme
@@ -155,18 +179,27 @@ def sanitize_url(url):
155179 sanitized_path = quote (parsed_url .path )
156180 sanitized_query = quote (parsed_url .query )
157181 # Reconstruct the sanitized URL
158- sanitized_url = urlunparse ((
182+ return urlunparse ((
159183 parsed_url .scheme ,
160184 parsed_url .netloc ,
161185 sanitized_path ,
162186 parsed_url .params ,
163187 sanitized_query ,
164- parsed_url .fragment
188+ parsed_url .fragment ,
165189 ))
166- return sanitized_url
167190
168191
169- def sanitize_intersphinx_mapping (mapping ):
170- """ADD DOCS.
192+ def sanitize_intersphinx_mapping (mapping : dict ) -> dict :
193+ """
194+ Sanitize URLs in an intersphinx mapping dictionary.
195+
196+ This function applies URL sanitization to each URL in the mapping while
197+ preserving the associated extra values.
198+
199+ Args:
200+ mapping (dict) -- A dictionary mapping names to tuples of (url, extra_value).
201+
202+ Returns:
203+ dict -- A dictionary with the same structure but with sanitized URLs.
171204 """
172205 return {key : (sanitize_url (url ), extra_value ) for key , (url , extra_value ) in mapping .items ()}
0 commit comments