changed docs:links:check

Jannis-Mittenzwei · Jannis-Mittenzwei · commit fd521f50da1c · 2025-06-04T17:04:42.000+02:00
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -39,6 +39,22 @@ jobs:
         run: |
           poetry run -- nox -s docs:build
 
+  Documentation-Links:
+    name: Doc Links Check
+    runs-on: ubuntu-24.04
+    permissions:
+      contents: read
+    steps:
+      - name: SCM Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python & Poetry Environment
+        uses: ./.github/actions/python-environment
+
+      - name: Link Check
+        run: |
+          poetry run -- nox -s docs:links:check
+
   Changelog:
     name: Changelog Update Check
     runs-on: ubuntu-24.04
diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md
@@ -25,4 +25,8 @@ permissions to be increased for specific jobs.
 ## Security
 
 * [#420](https://github.com/exasol/python-toolbox/issues/420): Replaced 3rd party action with GitHub actions for gh-pages
-* [#422](https://github.com/exasol/python-toolbox/issues/422): Set permissions within the GitHub workflows to restrict usage of the default GitHub token
+* [#422](https://github.com/exasol/python-toolbox/issues/422): Set permissions within the GitHub workflows to restrict usage of the default GitHub token
+
+## ✨ Features
+
+* [#409](https://github.com/exasol/python-toolbox/issues/409): Doc link & checks
diff --git a/doc/conf.py b/doc/conf.py
@@ -79,7 +79,4 @@
     "accent_color": "grass",
 }
 # -- Configure link checking behavior  ----------------------------------------
-extra_linkcheck_ignores = os.getenv("SPHINX_EXTRA_LINKCHECK_IGNORES")
-linkcheck_ignore = (
-    [] if not extra_linkcheck_ignores else extra_linkcheck_ignores.split(",")
-)
+linkcheck_rate_limit_timeout = 15
diff --git a/doc/github_actions/security_issues.rst b/doc/github_actions/security_issues.rst
@@ -112,4 +112,4 @@ Ideas
 .. todo::
 
     Consider adapting common CVE report format as input, for additional details
-    `see here <https://github.com/CVEProject/cve-schema/blob/master/schema/v5.0/CVE_JSON_5.0_schema.json>`_.
+    `see here <https://github.com/CVEProject/cve-schema/blob/main/schema/CVE_Record_Format.json>`_.
diff --git a/exasol/toolbox/nox/_documentation.py b/exasol/toolbox/nox/_documentation.py
@@ -16,14 +16,11 @@
     Optional,
     Tuple,
 )
+import argparse
 
 import nox
+import requests   # type: ignore
 from nox import Session
-from requests import (
-    get,
-    head,
-)
-from requests.exceptions import Timeout
 
 from exasol.toolbox.nox._shared import DOCS_OUTPUT_DIR
 from noxconfig import (
@@ -34,8 +31,6 @@
 
 def _build_docs(session: nox.Session, config: Config) -> None:
     session.run(
-        "poetry",
-        "run",
         "sphinx-build",
         "-W",
         "-b",
@@ -47,15 +42,34 @@ def _build_docs(session: nox.Session, config: Config) -> None:
 
 def _build_multiversion_docs(session: nox.Session, config: Config) -> None:
     session.run(
-        "poetry",
-        "run",
         "sphinx-multiversion",
         f"{config.doc}",
         DOCS_OUTPUT_DIR,
     )
     session.run("touch", f"{DOCS_OUTPUT_DIR}/.nojekyll")
 
 
+def _check_failed_links(results: list[str]):
+    errors = []
+    for line, result in enumerate(results):
+        if result.startswith("{") and "}" in result:
+            data = json.loads(result)
+            if not (data["status"] == "working") or (data["status"] == "ignored"):
+                match = re.search(r"https?://[^\s\"\'<>]+", data["uri"])
+                if match:
+                    try:
+                        request = requests.head(match.group(), timeout=15)
+                        if request.status_code == 200:
+                            data["status"] = "working"
+                            data["code"] = request.status_code
+                        results[line] = json.dumps(data)
+                    except requests.exceptions.Timeout:
+                        pass
+                if (data["status"] == "broken") or data["status"] == "timeout":
+                    errors.append(result)
+    return results, errors
+
+
 def _git_diff_changes_main() -> int:
     """
     Check if doc/changes is changed and return the exit code of command git diff.
@@ -108,25 +122,18 @@ def clean_docs(_session: Session) -> None:
 @nox.session(name="docs:links", python=False)
 def docs_list_links(session: Session) -> None:
     """List all the links within the documentation."""
-    ignore = [r".*"]
-    env = os.environ.copy()
-    env["SPHINX_EXTRA_LINKCHECK_IGNORES"] = ",".join(ignore)
     with tempfile.TemporaryDirectory() as path:
         tmpdir = Path(path)
         sp = subprocess.run(
             [
-                "poetry",
-                "run",
-                "--",
                 "sphinx-build",
                 "-b",
                 "linkcheck",
+                "-D",
+                "linkcheck_ignore=.*",
                 PROJECT_CONFIG.root / "doc",
                 tmpdir,
             ],
-            capture_output=True,
-            text=True,
-            env=env,
         )
         print(sp.returncode)
         if sp.returncode >= 2:
@@ -151,65 +158,43 @@ def docs_list_links(session: Session) -> None:
 @nox.session(name="docs:links:check", python=False)
 def docs_links_check(session: Session) -> None:
     """Checks whether all links in the documentation are accessible."""
-    ignore = [r"https?://"]
-    env = os.environ.copy()
-    env["SPHINX_EXTRA_LINKCHECK_IGNORES"] = ",".join(ignore)
+    parser = argparse.ArgumentParser(
+        prog="nox -s release:prepare",
+        usage="nox -s release:prepare -- [-h] [-o |--output]",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        type=Path,
+        help="path to output file",
+        default="",
+    )
+    args = parser.parse_args(session.posargs)
     with tempfile.TemporaryDirectory() as path:
         tmpdir = Path(path)
         sp = subprocess.run(
             [
-                "poetry",
-                "run",
-                "--",
                 "sphinx-build",
                 "-b",
                 "linkcheck",
                 PROJECT_CONFIG.root / "doc",
                 tmpdir,
             ],
-            capture_output=True,
-            text=True,
-            env=env,
         )
-        print(sp.returncode)
         if sp.returncode >= 2:
             print(sp.stderr)
             session.error(2)
         output = tmpdir / "output.json"
-        results = output.read_text().split("\n")
-        reslen = len(results)
-        resstr = results[-1]
-        if (reslen == 0) or ((reslen == 1) and (resstr == "")):
-            return
-        elif resstr == "":
-            results.pop()
-        for line_nr, result in enumerate(results):
-            resdict = json.loads(result)
-            if resdict["status"] == "ignored" and resdict["uri"].startswith("http"):
-                try:
-                    match = re.search(r"https?://[^\s\"\'<>]+", resdict["uri"])
-                    if match:
-                        resdict["uri"] = match.group()
-                    print(f"{line_nr}/{reslen}")
-                    request = head(resdict["uri"], timeout=5)
-                    if request.status_code != 200:
-                        request = get(resdict["uri"], timeout=5, stream=True)
-                        request.close()
-                    if request.status_code >= 400:
-                        resdict["status"] = "broken"
-                        resdict["code"] = request.status_code
-                    if request.status_code < 400:
-                        resdict["status"] = "working"
-                        resdict["code"] = request.status_code
-                except Timeout:
-                    resdict["status"] = "timeout"
-                results[line_nr] = json.dumps(resdict)
-        output.write_text("\n".join(f"{r}" for r in results))
-        errors = []
-        for result in results:
-            data = json.loads(result)
-            if (data["status"] == "broken") or data["status"] == "timeout":
-                errors.append(result)
+        out = output.read_text().split("\n")
+        results, errors = _check_failed_links(out)
+        if hasattr(args, "output"):
+            outputfile = Path(args.output) / "link-check-output.json"
+            if not outputfile.exists():
+                outputfile.parent.mkdir(parents=True, exist_ok=True)
+                outputfile.touch()
+            outputfile.write_text("\n".join(result for result in results))
+            print(f"file generated at path: {outputfile.resolve()}")
         if errors:
             print("Error" + "s" if len(errors) > 1 else "")
             print("\n".join(error for error in errors))
diff --git a/poetry.lock b/poetry.lock
diff --git a/project-template/{{cookiecutter.repo_name}}/doc/conf.py b/project-template/{{cookiecutter.repo_name}}/doc/conf.py
@@ -76,3 +76,5 @@
     "github_url": "https://github.com/exasol/{{cookiecutter.repo_name}}",
     "accent_color": "grass",
 }
+# -- Configure link checking behavior  ----------------------------------------
+linkcheck_rate_limit_timeout = 15
diff --git a/pyproject.toml b/pyproject.toml
@@ -64,7 +64,6 @@ bandit = {extras = ["toml"], version = "^1.7.9"}
 jinja2 = "^3.1.6"
 pip-licenses = "^5.0.0"
 pip-audit = "^2.7.3"
-urlscan = "^1.0.6"
 
 [tool.poetry.group.dev.dependencies]
 autoimport = "^1.4.0"

Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,4 @@`
`79`	`79`	`"accent_color": "grass",`
`80`	`80`	`}`
`81`	`81`	`# -- Configure link checking behavior ----------------------------------------`
`82`		`-extra_linkcheck_ignores = os.getenv("SPHINX_EXTRA_LINKCHECK_IGNORES")`
`83`		`-linkcheck_ignore = (`
`84`		`- [] if not extra_linkcheck_ignores else extra_linkcheck_ignores.split(",")`
`85`		`-)`
	`82`	`+linkcheck_rate_limit_timeout = 15`
Original file line number	Diff line number	Diff line change
`@@ -76,3 +76,5 @@`
`76`	`76`	`"github_url": "https://github.com/exasol/{{cookiecutter.repo_name}}",`
`77`	`77`	`"accent_color": "grass",`
`78`	`78`	`}`
	`79`	`+# -- Configure link checking behavior ----------------------------------------`
	`80`	`+linkcheck_rate_limit_timeout = 15`