Skip to content

Commit 9f416a8

Browse files
[STYLE] Improve type-hints and inline-comments as discussed in review (- WIP PR #427 -)
Changes in file docs/utils.py: * improved inline comment to mention "normalize" "to prevent homograph attacks" * corrected misleading type hints/annotations to use the better 'frozenset' type for whitelisting
1 parent 192d50e commit 9f416a8

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

docs/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@
3131
# URL allowed scheme list
3232
# Enforces:
3333
# - URLs Must start with https
34-
URL_ALLOWED_SCHEMES: list = frozenset({"https"})
34+
URL_ALLOWED_SCHEMES: frozenset = frozenset({"https"})
3535

3636

3737
# URL allowed domain list
3838
# Enforces:
3939
# - URLs Must belong to one of these domains
40-
URL_ALLOWED_NETLOCS: list = frozenset({
40+
URL_ALLOWED_NETLOCS: frozenset = frozenset({
4141
"github.com", "gist.github.com", "readthedocs.com", "docs.python.org", "peps.python.org",
4242
})
4343

@@ -230,7 +230,7 @@ def slugify_header(s: str) -> str:
230230
>>> slugify_header("[CEP-7] Documentation *Guide*")
231231
'cep-7-documentation-guide'
232232
"""
233-
# First Normalize Unicode characters
233+
# First Normalize Unicode characters to prevent homograph attacks
234234
text: str = unicodedata.normalize('NFKC', s) # added in v2.0.9a6
235235
# Then, remove special characters and convert to lowercase
236236
text = re.sub(r'[^\w\- ]', "", text).strip().lower()
@@ -280,7 +280,7 @@ def sanitize_url(url: str) -> str:
280280
# Validate netloc
281281
if parsed_url.netloc not in URL_ALLOWED_NETLOCS:
282282
raise ValueError(INVALID_DOMAIN_ERROR)
283-
# Normalize netloc
283+
# Normalize netloc to prevent homograph attacks
284284
sanitized_netloc: str = unicodedata.normalize('NFKC', parsed_url.netloc) # added in v2.0.9a6
285285
# Sanitize path and query - using the safe parameter to preserve URL structure
286286
sanitized_path: str = quote(unicodedata.normalize('NFKC', parsed_url.path), safe="/=")

0 commit comments

Comments
 (0)