Skip to content

Commit b1a96bb

Browse files
committed
doc: Add test for broken links in manpage-urls.json
1 parent 55520e0 commit b1a96bb

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed

doc/default.nix

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,4 +149,26 @@ in pkgs.stdenv.mkDerivation {
149149
echo "doc manual $dest ${common.indexPath}" >> $out/nix-support/hydra-build-products
150150
echo "doc manual $dest nixpkgs-manual.epub" >> $out/nix-support/hydra-build-products
151151
'';
152+
153+
passthru.tests.manpage-urls = with pkgs; testers.invalidateFetcherByDrvHash
154+
({ name ? "manual_check-manpage-urls"
155+
, script
156+
, urlsFile
157+
}: runCommand name {
158+
nativeBuildInputs = [
159+
cacert
160+
(python3.withPackages (p: with p; [
161+
aiohttp
162+
rich
163+
structlog
164+
]))
165+
];
166+
outputHash = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="; # Empty output
167+
} ''
168+
python3 ${script} ${urlsFile}
169+
touch $out
170+
'') {
171+
script = ./tests/manpage-urls.py;
172+
urlsFile = ./manpage-urls.json;
173+
};
152174
}

doc/tests/manpage-urls.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#! /usr/bin/env nix-shell
2+
#! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])"
3+
4+
from argparse import ArgumentParser
5+
from collections import defaultdict
6+
from enum import IntEnum
7+
from http import HTTPStatus
8+
from pathlib import Path
9+
import asyncio, json, logging
10+
11+
import aiohttp, structlog
12+
from structlog.contextvars import bound_contextvars as log_context
13+
14+
15+
LogLevel = IntEnum('LogLevel', {
16+
lvl: getattr(logging, lvl)
17+
for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
18+
})
19+
LogLevel.__str__ = lambda self: self.name
20+
21+
22+
EXPECTED_STATUS=frozenset((
23+
HTTPStatus.OK, HTTPStatus.FOUND,
24+
HTTPStatus.NOT_FOUND,
25+
))
26+
27+
async def check(session, manpage: str, url: str) -> HTTPStatus:
28+
with log_context(manpage=manpage, url=url):
29+
logger.debug("Checking")
30+
async with session.head(url) as resp:
31+
st = HTTPStatus(resp.status)
32+
match st:
33+
case HTTPStatus.OK | HTTPStatus.FOUND:
34+
logger.debug("OK!")
35+
case HTTPStatus.NOT_FOUND:
36+
logger.error("Broken link!")
37+
case _ if st < 400:
38+
logger.info("Unexpected code", status=st)
39+
case _ if 400 <= st < 600:
40+
logger.warn("Unexpected error", status=st)
41+
42+
return st
43+
44+
async def main(urls_path):
45+
logger.info(f"Parsing {urls_path}")
46+
with urls_path.open() as urls_file:
47+
urls = json.load(urls_file)
48+
49+
count = defaultdict(lambda: 0)
50+
51+
logger.info(f"Checking URLs from {urls_path}")
52+
async with aiohttp.ClientSession() as session:
53+
for status in asyncio.as_completed([
54+
check(session, manpage, url)
55+
for manpage, url in urls.items()
56+
]):
57+
count[await status]+=1
58+
59+
ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND]
60+
broken = count[HTTPStatus.NOT_FOUND]
61+
unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS)
62+
logger.info(f"Done: {broken} broken links, "
63+
f"{ok} correct links, and {unknown} unexpected status")
64+
65+
return count
66+
67+
68+
def parse_args(args=None):
69+
parser = ArgumentParser(
70+
prog = 'check-manpage-urls',
71+
description = 'Check the validity of the manpage URLs linked in the nixpkgs manual',
72+
)
73+
parser.add_argument(
74+
'-l', '--log-level',
75+
default = os.getenv('LOG_LEVEL', 'INFO'),
76+
type = lambda s: LogLevel[s],
77+
choices = list(LogLevel),
78+
)
79+
parser.add_argument(
80+
'file',
81+
type = Path,
82+
nargs = '?',
83+
)
84+
85+
return parser.parse_args(args)
86+
87+
88+
if __name__ == "__main__":
89+
import os, sys
90+
91+
args = parse_args()
92+
93+
structlog.configure(
94+
wrapper_class=structlog.make_filtering_bound_logger(args.log_level),
95+
)
96+
logger = structlog.getLogger("check-manpage-urls.py")
97+
98+
urls_path = args.file
99+
if urls_path is None:
100+
REPO_ROOT = Path(__file__).parent.parent.parent.parent
101+
logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}")
102+
103+
urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json'
104+
105+
count = asyncio.run(main(urls_path))
106+
107+
sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1)

0 commit comments

Comments
 (0)