Skip to content

Commit 95f7e2b

Browse files
committed
Adding link checker
Signed-off-by: Aidan Reilly <[email protected]>
1 parent a915636 commit 95f7e2b

File tree

3 files changed

+135
-0
lines changed

3 files changed

+135
-0
lines changed

.github/workflows/check-links.yml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Check links in AsciiDoc
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
check-links:
7+
name: Check links in modified files
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v4
12+
with:
13+
fetch-depth: 0
14+
15+
- name: Install Asciidoctor
16+
run: |
17+
sudo apt-get update
18+
sudo apt-get install -y asciidoctor
19+
20+
- name: Make script executable
21+
run: chmod +x scripts/check-links.sh
22+
23+
- name: Fetch base branch
24+
run: git fetch origin main
25+
26+
- name: Check links in modified files
27+
shell: bash
28+
run: |
29+
MODIFIED_FILES=$(git diff --name-only origin/main...HEAD --diff-filter=d -- "*.adoc")
30+
CHECK_FILES=()
31+
32+
while IFS= read -r file; do
33+
[[ "$file" =~ ^assemblies/[^/]+\.adoc$ ]] && CHECK_FILES+=("$file")
34+
done <<< "$MODIFIED_FILES"
35+
36+
if [ ${#CHECK_FILES[@]} -eq 0 ]; then
37+
echo "No modified .adoc files found in assemblies/. Skipping link check."
38+
exit 0
39+
fi
40+
41+
for f in "${CHECK_FILES[@]}"; do
42+
./scripts/check-links.sh "$f"
43+
done

scripts/check-links.sh

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/bash
2+
#
3+
# Checks for 404 links using Asciidoctor and curl
4+
5+
usage() {
6+
echo "Usage: $0 [<adoc_file>]"
7+
exit 1
8+
}
9+
10+
# Parse arguments
11+
INPUT_FILE=""
12+
13+
# Check dependencies
14+
if ! asciidoctor -v >/dev/null 2>&1; then
15+
echo "Error: Asciidoctor is not installed" >&2
16+
exit 1
17+
fi
18+
19+
INPUT_FILE="$1"
20+
21+
if [ $# -eq 0 ]; then
22+
usage
23+
fi
24+
25+
# Create temp file for flagging broken links
26+
TMP_FILE=$(mktemp)
27+
echo "0" > "$TMP_FILE"
28+
29+
# Load ignore patterns from external file
30+
IGNORE_FILE="$(dirname "$0")/links.ignore"
31+
32+
if [ ! -f "$IGNORE_FILE" ]; then
33+
echo "Error: Missing ignore patterns file: $IGNORE_FILE" >&2
34+
exit 1
35+
fi
36+
37+
mapfile -t IGNORE_PATTERNS < "$IGNORE_FILE"
38+
PATTERNS_DECL=$(declare -p IGNORE_PATTERNS)
39+
40+
check_url() {
41+
local URL=$1
42+
eval "$PATTERNS_DECL"
43+
44+
URL=${URL%[.,;:?!\]\)]}
45+
46+
for PATTERN in "${IGNORE_PATTERNS[@]}"; do
47+
if [[ "$URL" =~ $PATTERN ]]; then
48+
exit 0
49+
fi
50+
done
51+
52+
STATUS=$(curl -Ls -o /dev/null -w "%{http_code}" --max-time 5 --connect-timeout 2 "$URL")
53+
54+
if [[ "$STATUS" != "000" && "$STATUS" != "403" && ! "$STATUS" =~ ^(2|3)[0-9]{2}$ ]]; then
55+
echo -e "Invalid URL (HTTP status $STATUS): \n\033[31m$URL\033[0m"
56+
echo "1" > "$TMP_FILE"
57+
fi
58+
}
59+
60+
export TMP_FILE
61+
export -f check_url
62+
63+
run_url_checks() {
64+
local FILE="$1"
65+
echo -e "\033[32mChecking: $FILE\033[0m"
66+
asciidoctor "$FILE" -a doctype=book -o - | \
67+
grep -Eo '(http|https)://[a-zA-Z0-9./?=%_-]*' | \
68+
sort -u | \
69+
xargs -P 10 -n 1 bash -c "$PATTERNS_DECL; check_url \"\$0\""
70+
}
71+
72+
run_url_checks "$INPUT_FILE"
73+
74+
if [ "$(cat "$TMP_FILE")" -eq 1 ]; then
75+
echo "Errors found"
76+
exit 1
77+
fi

scripts/links.ignore

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Add ignore link regexes one per line
2+
.*docs\.google\.com.*
3+
.*google\.com.*
4+
.*issues\.redhat\.com.*
5+
.*0\.0\.0\.0.*
6+
.*localhost.*
7+
.*registry\.redhat\.io.*
8+
.*example\.org.*
9+
.*github.com/example/myrepo\.git
10+
.*fonts\.googleapis\.com.*
11+
.*mixtral-my-project.apps\.my-cluster\.com.*
12+
.*openshiftapps\.com.*
13+
.*minio-cluster\.local.*
14+
.*codeflare-operator-webhook-service\.redhat-ods-applications\.svc
15+
.*example.com.*

0 commit comments

Comments
 (0)