|
31 | 31 | # URL allowed scheme list |
32 | 32 | # Enforces: |
33 | 33 | # - URLs Must start with https |
34 | | -URL_ALLOWED_SCHEMES: list = frozenset({"https"}) |
| 34 | +URL_ALLOWED_SCHEMES: frozenset = frozenset({"https"}) |
35 | 35 |
|
36 | 36 |
|
37 | 37 | # URL allowed domain list |
38 | 38 | # Enforces: |
39 | 39 | # - URLs Must belong to one of these domains |
40 | | -URL_ALLOWED_NETLOCS: list = frozenset({ |
| 40 | +URL_ALLOWED_NETLOCS: frozenset = frozenset({ |
41 | 41 | "github.com", "gist.github.com", "readthedocs.com", "docs.python.org", "peps.python.org", |
42 | 42 | }) |
43 | 43 |
|
@@ -230,7 +230,7 @@ def slugify_header(s: str) -> str: |
230 | 230 | >>> slugify_header("[CEP-7] Documentation *Guide*") |
231 | 231 | 'cep-7-documentation-guide' |
232 | 232 | """ |
233 | | - # First Normalize Unicode characters |
| 233 | + # First Normalize Unicode characters to prevent homograph attacks |
234 | 234 | text: str = unicodedata.normalize('NFKC', s) # added in v2.0.9a6 |
235 | 235 | # Then, remove special characters and convert to lowercase |
236 | 236 | text = re.sub(r'[^\w\- ]', "", text).strip().lower() |
@@ -280,7 +280,7 @@ def sanitize_url(url: str) -> str: |
280 | 280 | # Validate netloc |
281 | 281 | if parsed_url.netloc not in URL_ALLOWED_NETLOCS: |
282 | 282 | raise ValueError(INVALID_DOMAIN_ERROR) |
283 | | - # Normalize netloc |
| 283 | + # Normalize netloc to prevent homograph attacks |
284 | 284 | sanitized_netloc: str = unicodedata.normalize('NFKC', parsed_url.netloc) # added in v2.0.9a6 |
285 | 285 | # Sanitize path and query - using the safe parameter to preserve URL structure |
286 | 286 | sanitized_path: str = quote(unicodedata.normalize('NFKC', parsed_url.path), safe="/=") |
|
0 commit comments