|
3 | 3 | import shutil |
4 | 4 | import subprocess |
5 | 5 | import sys |
| 6 | +from urllib import request |
| 7 | +import urllib.error |
6 | 8 | import webbrowser |
| 9 | +from itertools import repeat |
| 10 | +from pathlib import Path |
| 11 | +from typing import ( |
| 12 | + Container, |
| 13 | + Iterable, |
| 14 | + Optional, |
| 15 | + Tuple, |
| 16 | +) |
| 17 | + |
7 | 18 |
|
8 | 19 | import nox |
9 | 20 | from nox import Session |
@@ -39,6 +50,46 @@ def _build_multiversion_docs(session: nox.Session, config: Config) -> None: |
39 | 50 | session.run("touch", f"{DOCS_OUTPUT_DIR}/.nojekyll") |
40 | 51 |
|
41 | 52 |
|
| 53 | +def _doc_files(root: Path) -> Iterable[Path]: |
| 54 | + """Returns an iterator over all documentation files of the project""" |
| 55 | + docs = Path(root).glob("**/*.rst") |
| 56 | + |
| 57 | + def _deny_filter(path: Path) -> bool: |
| 58 | + return not ("venv" in path.parts) |
| 59 | + |
| 60 | + return filter(lambda path: _deny_filter(path), docs) |
| 61 | + |
| 62 | + |
| 63 | +def _doc_urls(files: Iterable[Path]) -> Iterable[tuple[Path, str]]: |
| 64 | + """Returns an iterable over all urls contained in the provided files""" |
| 65 | + def should_filter(url: str) -> bool: |
| 66 | + _filtered: Container[str] = [] |
| 67 | + return url.startswith("mailto") or url in _filtered |
| 68 | + |
| 69 | + for file in files: |
| 70 | + cmd = ["python", "-m", "urlscan", "-n", f"{file}"] |
| 71 | + result = subprocess.run(cmd, capture_output=True) |
| 72 | + if result.returncode != 0: |
| 73 | + stderr = result.stderr.decode("utf8") |
| 74 | + msg = f"Could not retrieve url's from file: {file}, details: {stderr}" |
| 75 | + raise Exception(msg) |
| 76 | + stdout = result.stdout.decode("utf8").strip() |
| 77 | + _urls = (url.strip() for url in stdout.split("\n")) |
| 78 | + _urls = (url for url in _urls if url) # filter empty strings and none |
| 79 | + yield from zip(repeat(file), filter(lambda url: not should_filter(url), _urls)) |
| 80 | + |
| 81 | + |
| 82 | +def _doc_links_check(url: str) -> Tuple[Optional[int], str]: |
| 83 | + """Checks if an url is still working (can be accessed)""" |
| 84 | + try: |
| 85 | + # User-Agent needs to be faked otherwise some webpages will deny access with a 403 |
| 86 | + req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/10.0"}) |
| 87 | + result = request.urlopen(req) |
| 88 | + return result.code, f"{result.msg}" |
| 89 | + except urllib.error.HTTPError as ex: |
| 90 | + return ex.code, f"{ex}" |
| 91 | + |
| 92 | + |
42 | 93 | def _git_diff_changes_main() -> int: |
43 | 94 | """ |
44 | 95 | Check if doc/changes is changed and return the exit code of command git diff. |
@@ -88,6 +139,29 @@ def clean_docs(_session: Session) -> None: |
88 | 139 | shutil.rmtree(docs_folder) |
89 | 140 |
|
90 | 141 |
|
| 142 | +@nox.session(name="docs:links", python=False) |
| 143 | +def docs_list_links(session: Session) -> None: |
| 144 | + """List all the links within the documentation.""" |
| 145 | + for path, url in _doc_urls(_doc_files(PROJECT_CONFIG.root)): |
| 146 | + session.log(f"Url: {url}, File: {path}") |
| 147 | + |
| 148 | + |
| 149 | +@nox.session(name="docs:links:check", python=False) |
| 150 | +def docs_links_check(session: Session) -> None: |
| 151 | + """Checks whether all links in the documentation are accessible.""" |
| 152 | + errors = [] |
| 153 | + for path, url in _doc_urls(_doc_files(PROJECT_CONFIG.root)): |
| 154 | + status, details = _doc_links_check(url) |
| 155 | + if status != 200: |
| 156 | + errors.append((path, url, status, details)) |
| 157 | + |
| 158 | + if errors: |
| 159 | + session.error( |
| 160 | + "\n" |
| 161 | + + "\n".join(f"Url: {e[1]}, File: {e[0]}, Error: {e[3]}" for e in errors) |
| 162 | + ) |
| 163 | + |
| 164 | + |
91 | 165 | @nox.session(name="changelog:updated", python=False) |
92 | 166 | def updated(_session: Session) -> None: |
93 | 167 | """Checks if the change log has been updated""" |
|
0 commit comments