|
17 | 17 | # limitations under the License. |
18 | 18 |
|
19 | 19 | import re |
| 20 | +from urllib.parse import urlparse, urlunparse, quote |
20 | 21 |
|
21 | 22 |
|
22 | 23 | # Git reference validation pattern |
|
26 | 27 | GIT_REF_PATTERN = r'^[a-zA-Z0-9][a-zA-Z0-9_\-./]*$' |
27 | 28 |
|
28 | 29 |
|
| 30 | +# URL allowed scheme list |
| 31 | +# Enforces: |
| 32 | +# - URLs Must start with https |
| 33 | +URL_ALLOWED_SCHEMES = {"https"} |
| 34 | + |
| 35 | + |
| 36 | +# URL allowed domain list |
| 37 | +# Enforces: |
| 38 | +# - URLs Must belone to one of these domains |
| 39 | +URL_ALLOWED_NETLOCS = {"github.com", "readthedocs.com"} |
| 40 | + |
| 41 | + |
29 | 42 | def _validate_git_ref(ref: str) -> str: |
30 | 43 | """ |
31 | 44 | Validate if the provided string is a valid Git reference. |
@@ -126,3 +139,34 @@ def slugify_header(s: str) -> str: |
126 | 139 | text = re.sub(r'[^\w\- ]', "", s).strip().lower() |
127 | 140 | # Then replace consecutive spaces or dashes with a single dash |
128 | 141 | return re.sub(r'[-\s]+', "-", text) |
| 142 | + |
| 143 | + |
| 144 | +def sanitize_url(url): |
| 145 | + """ADD DOCS. |
| 146 | + """ |
| 147 | + parsed_url = urlparse(url) |
| 148 | + # Validate scheme |
| 149 | + if parsed_url.scheme not in URL_ALLOWED_SCHEMES: |
| 150 | + raise ValueError("Invalid URL scheme. Only 'https' is allowed.") |
| 151 | + # Validate netloc |
| 152 | + if parsed_url.netloc not in URL_ALLOWED_NETLOCS: |
| 153 | + raise ValueError(f"Invalid or untrusted domain. Only {URL_ALLOWED_NETLOCS} are allowed.") |
| 154 | + # Sanitize path and query |
| 155 | + sanitized_path = quote(parsed_url.path) |
| 156 | + sanitized_query = quote(parsed_url.query) |
| 157 | + # Reconstruct the sanitized URL |
| 158 | + sanitized_url = urlunparse(( |
| 159 | + parsed_url.scheme, |
| 160 | + parsed_url.netloc, |
| 161 | + sanitized_path, |
| 162 | + parsed_url.params, |
| 163 | + sanitized_query, |
| 164 | + parsed_url.fragment |
| 165 | + )) |
| 166 | + return sanitized_url |
| 167 | + |
| 168 | + |
| 169 | +def sanitize_intersphinx_mapping(mapping): |
| 170 | + """ADD DOCS. |
| 171 | + """ |
| 172 | + return {key: (sanitize_url(url), extra_value) for key, (url, extra_value) in mapping.items()} |
0 commit comments