Skip to content

Commit 7b76cc8

Browse files
Jannis-MittenzweiArBridgeman
authored andcommitted
add doc:links and docs:links:check
1 parent 89ad700 commit 7b76cc8

File tree

2 files changed

+107
-36
lines changed

2 files changed

+107
-36
lines changed

exasol/toolbox/nox/_documentation.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,18 @@
33
import shutil
44
import subprocess
55
import sys
6+
from urllib import request
7+
import urllib.error
68
import webbrowser
9+
from itertools import repeat
10+
from pathlib import Path
11+
from typing import (
12+
Container,
13+
Iterable,
14+
Optional,
15+
Tuple,
16+
)
17+
718

819
import nox
920
from nox import Session
@@ -39,6 +50,46 @@ def _build_multiversion_docs(session: nox.Session, config: Config) -> None:
3950
session.run("touch", f"{DOCS_OUTPUT_DIR}/.nojekyll")
4051

4152

53+
def _doc_files(root: Path) -> Iterable[Path]:
54+
"""Returns an iterator over all documentation files of the project"""
55+
docs = Path(root).glob("**/*.rst")
56+
57+
def _deny_filter(path: Path) -> bool:
58+
return not ("venv" in path.parts)
59+
60+
return filter(lambda path: _deny_filter(path), docs)
61+
62+
63+
def _doc_urls(files: Iterable[Path]) -> Iterable[tuple[Path, str]]:
64+
"""Returns an iterable over all urls contained in the provided files"""
65+
def should_filter(url: str) -> bool:
66+
_filtered: Container[str] = []
67+
return url.startswith("mailto") or url in _filtered
68+
69+
for file in files:
70+
cmd = ["python", "-m", "urlscan", "-n", f"{file}"]
71+
result = subprocess.run(cmd, capture_output=True)
72+
if result.returncode != 0:
73+
stderr = result.stderr.decode("utf8")
74+
msg = f"Could not retrieve url's from file: {file}, details: {stderr}"
75+
raise Exception(msg)
76+
stdout = result.stdout.decode("utf8").strip()
77+
_urls = (url.strip() for url in stdout.split("\n"))
78+
_urls = (url for url in _urls if url) # filter empty strings and none
79+
yield from zip(repeat(file), filter(lambda url: not should_filter(url), _urls))
80+
81+
82+
def _doc_links_check(url: str) -> Tuple[Optional[int], str]:
83+
"""Checks if an url is still working (can be accessed)"""
84+
try:
85+
# User-Agent needs to be faked otherwise some webpages will deny access with a 403
86+
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/10.0"})
87+
result = request.urlopen(req)
88+
return result.code, f"{result.msg}"
89+
except urllib.error.HTTPError as ex:
90+
return ex.code, f"{ex}"
91+
92+
4293
def _git_diff_changes_main() -> int:
4394
"""
4495
Check if doc/changes is changed and return the exit code of command git diff.
@@ -88,6 +139,29 @@ def clean_docs(_session: Session) -> None:
88139
shutil.rmtree(docs_folder)
89140

90141

142+
@nox.session(name="docs:links", python=False)
143+
def docs_list_links(session: Session) -> None:
144+
"""List all the links within the documentation."""
145+
for path, url in _doc_urls(_doc_files(PROJECT_CONFIG.root)):
146+
session.log(f"Url: {url}, File: {path}")
147+
148+
149+
@nox.session(name="docs:links:check", python=False)
150+
def docs_links_check(session: Session) -> None:
151+
"""Checks whether all links in the documentation are accessible."""
152+
errors = []
153+
for path, url in _doc_urls(_doc_files(PROJECT_CONFIG.root)):
154+
status, details = _doc_links_check(url)
155+
if status != 200:
156+
errors.append((path, url, status, details))
157+
158+
if errors:
159+
session.error(
160+
"\n"
161+
+ "\n".join(f"Url: {e[1]}, File: {e[0]}, Error: {e[3]}" for e in errors)
162+
)
163+
164+
91165
@nox.session(name="changelog:updated", python=False)
92166
def updated(_session: Session) -> None:
93167
"""Checks if the change log has been updated"""

poetry.lock

Lines changed: 33 additions & 36 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)