Skip to content

Commit abb49f9

Browse files
committed
Adding link checker script + GitHub CI
Signed-off-by: Aidan Reilly <[email protected]>
1 parent a915636 commit abb49f9

File tree

3 files changed

+132
-0
lines changed

3 files changed

+132
-0
lines changed

.github/workflows/check-links.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Check links in AsciiDoc
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
check-links:
7+
name: Check links in modified files
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v4
12+
13+
- name: Install Asciidoctor
14+
run: |
15+
sudo apt-get update
16+
sudo apt-get install -y asciidoctor
17+
18+
- name: Make script executable
19+
run: chmod +x scripts/check-links.sh
20+
21+
- name: Check links in modified files
22+
shell: bash
23+
run: |
24+
MODIFIED_FILES=$(git diff --name-only origin/main...HEAD --diff-filter=d -- "*.adoc")
25+
CHECK_FILES=()
26+
27+
while IFS= read -r file; do
28+
[[ "$file" =~ ^assemblies/[^/]+\.adoc$ ]] && CHECK_FILES+=("$file")
29+
done <<< "$MODIFIED_FILES"
30+
31+
if [ ${#CHECK_FILES[@]} -eq 0 ]; then
32+
echo "No modified .adoc files found in assemblies/. Skipping link check."
33+
exit 0
34+
fi
35+
36+
for f in "${CHECK_FILES[@]}"; do
37+
./scripts/check-links.sh "$f"
38+
done

scripts/check-links.sh

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/bin/bash
2+
#
3+
# Checks for 404 links using Asciidoctor and curl
4+
5+
usage() {
6+
echo "Usage: $0 [<adoc_file>]"
7+
exit 1
8+
}
9+
10+
# Parse arguments
11+
INPUT_FILE=""
12+
13+
# Check dependencies
14+
if ! asciidoctor -v >/dev/null 2>&1; then
15+
echo "Error: Asciidoctor is not installed" >&2
16+
exit 1
17+
fi
18+
19+
if [ $# -eq 0 ]; then
20+
usage
21+
fi
22+
23+
INPUT_FILE="$1"
24+
25+
# Create temp file for flagging broken links
26+
TMP_FILE=$(mktemp)
27+
trap 'rm -f "$TMP_FILE"' EXIT
28+
echo "0" > "$TMP_FILE"
29+
30+
# Load ignore patterns from external file
31+
IGNORE_FILE="$(dirname "$0")/links.ignore"
32+
33+
if [ ! -f "$IGNORE_FILE" ]; then
34+
echo "Error: Missing ignore patterns file: $IGNORE_FILE" >&2
35+
exit 1
36+
fi
37+
38+
mapfile -t IGNORE_PATTERNS < "$IGNORE_FILE"
39+
PATTERNS_DECL=$(declare -p IGNORE_PATTERNS)
40+
41+
check_url() {
42+
local URL=$1
43+
eval "$PATTERNS_DECL"
44+
45+
# Strip trailing punctuation
46+
URL=${URL%[.,;:?!\]\)]}
47+
48+
for PATTERN in "${IGNORE_PATTERNS[@]}"; do
49+
if [[ "$URL" =~ $PATTERN ]]; then
50+
return 0
51+
fi
52+
done
53+
54+
STATUS=$(curl -Ls -o /dev/null -w "%{http_code}" --max-time 5 --connect-timeout 2 "$URL")
55+
56+
if [[ "$STATUS" != "000" && "$STATUS" != "403" && ! "$STATUS" =~ ^(2|3)[0-9]{2}$ ]]; then
57+
echo -e "Invalid URL (HTTP status $STATUS): \n\033[31m$URL\033[0m"
58+
echo "1" > "$TMP_FILE"
59+
fi
60+
}
61+
62+
export TMP_FILE
63+
export -f check_url
64+
65+
run_url_checks() {
66+
local FILE="$1"
67+
echo -e "\033[32mChecking: $FILE\033[0m"
68+
asciidoctor "$FILE" -o - | \
69+
grep -Eo 'https?://[^")>\]\s]+' | \
70+
sort -u | \
71+
xargs -P 10 -n 1 bash -c "$PATTERNS_DECL; check_url \"\$0\""
72+
}
73+
74+
run_url_checks "$INPUT_FILE"
75+
76+
if [ "$(cat "$TMP_FILE")" -eq 1 ]; then
77+
echo "Errors found"
78+
exit 1
79+
fi

scripts/links.ignore

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Add ignore link regexes one per line
2+
.*docs\.google\.com.*
3+
.*google\.com.*
4+
.*issues\.redhat\.com.*
5+
.*0\.0\.0\.0.*
6+
.*localhost.*
7+
.*registry\.redhat\.io.*
8+
.*example\.org.*
9+
.*github.com/example/myrepo\.git
10+
.*fonts\.googleapis\.com.*
11+
.*mixtral-my-project.apps\.my-cluster\.com.*
12+
.*openshiftapps\.com.*
13+
.*minio-cluster\.local.*
14+
.*codeflare-operator-webhook-service\.redhat-ods-applications\.svc
15+
.*example.com.*

0 commit comments

Comments
 (0)