Skip to content

Commit 93521bf

Browse files
[FEATURE] Added inital implementation of url sanitization to documentation config (- WIP #213 -)
Changes in file docs/conf.py: * integrate the url sanitation functions Changes in file docs/utils.py: * implemented the url sanitation functions * related work
1 parent daf1eb6 commit 93521bf

File tree

2 files changed

+54
-6
lines changed

2 files changed

+54
-6
lines changed

docs/conf.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
sys.path.insert(0, os.path.abspath(".."))
4747
from docs.utils import _validate_git_ref # noqa
4848
from docs.utils import slugify_header # noqa
49+
from docs.utils import sanitize_url # noqa
50+
from docs.utils import sanitize_intersphinx_mapping # noqa
4951

5052
# Define the branch reference for linkcode_resolve
5153
DOCS_BUILD_REF: str = _validate_git_ref(os.environ.get("DOCS_BUILD_REF", "stable"))
@@ -309,7 +311,7 @@
309311
myst_gfm_only = False
310312

311313
myst_html_meta = {
312-
"github_url": f"https://github.com/reactive-firewall/{project}"
314+
"github_url": sanitize_url(f"https://github.com/reactive-firewall/{project}")
313315
}
314316

315317
# For GH-style admonitions to MyST conversion
@@ -419,7 +421,7 @@
419421

420422
# -- Link resolver -------------------------------------------------------------
421423

422-
linkcode_url_prefix: str = f"https://github.com/reactive-firewall/{project}"
424+
linkcode_url_prefix: str = sanitize_url(f"https://github.com/reactive-firewall/{project}")
423425

424426
suffix = "/issues/%s"
425427

@@ -432,9 +434,11 @@
432434

433435
# try to link with official python3 documentation.
434436
# see https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html for more
435-
intersphinx_mapping = {
436-
"python": ("https://docs.python.org/3", (None, "python-inv.txt"))
437-
}
437+
intersphinx_mapping = sanitize_intersphinx_mapping(
438+
{
439+
"python": ("https://docs.python.org/3", (None, "python-inv.txt")),
440+
},
441+
)
438442

439443

440444
def linkcode_resolve(domain, info):
@@ -450,4 +454,4 @@ def linkcode_resolve(domain, info):
450454
theResult = theResult.replace("/multicast.py", "/multicast/__init__.py")
451455
if "/tests.py" in theResult:
452456
theResult = theResult.replace("/tests.py", "/tests/__init__.py")
453-
return quote(theResult, safe=":/-._")
457+
return sanitize_url(quote(theResult, safe=":/-._"))

docs/utils.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# limitations under the License.
1818

1919
import re
20+
from urllib.parse import urlparse, urlunparse, quote
2021

2122

2223
# Git reference validation pattern
@@ -26,6 +27,18 @@
2627
GIT_REF_PATTERN = r'^[a-zA-Z0-9][a-zA-Z0-9_\-./]*$'
2728

2829

30+
# URL allowed scheme list
31+
# Enforces:
32+
# - URLs Must start with https
33+
URL_ALLOWED_SCHEMES = {"https"}
34+
35+
36+
# URL allowed domain list
37+
# Enforces:
38+
# - URLs Must belone to one of these domains
39+
URL_ALLOWED_NETLOCS = {"github.com", "readthedocs.com"}
40+
41+
2942
def _validate_git_ref(ref: str) -> str:
3043
"""
3144
Validate if the provided string is a valid Git reference.
@@ -126,3 +139,34 @@ def slugify_header(s: str) -> str:
126139
text = re.sub(r'[^\w\- ]', "", s).strip().lower()
127140
# Then replace consecutive spaces or dashes with a single dash
128141
return re.sub(r'[-\s]+', "-", text)
142+
143+
144+
def sanitize_url(url):
145+
"""ADD DOCS.
146+
"""
147+
parsed_url = urlparse(url)
148+
# Validate scheme
149+
if parsed_url.scheme not in URL_ALLOWED_SCHEMES:
150+
raise ValueError("Invalid URL scheme. Only 'https' is allowed.")
151+
# Validate netloc
152+
if parsed_url.netloc not in URL_ALLOWED_NETLOCS:
153+
raise ValueError(f"Invalid or untrusted domain. Only {URL_ALLOWED_NETLOCS} are allowed.")
154+
# Sanitize path and query
155+
sanitized_path = quote(parsed_url.path)
156+
sanitized_query = quote(parsed_url.query)
157+
# Reconstruct the sanitized URL
158+
sanitized_url = urlunparse((
159+
parsed_url.scheme,
160+
parsed_url.netloc,
161+
sanitized_path,
162+
parsed_url.params,
163+
sanitized_query,
164+
parsed_url.fragment
165+
))
166+
return sanitized_url
167+
168+
169+
def sanitize_intersphinx_mapping(mapping):
170+
"""ADD DOCS.
171+
"""
172+
return {key: (sanitize_url(url), extra_value) for key, (url, extra_value) in mapping.items()}

0 commit comments

Comments
 (0)