Skip to content

Commit 8a3cc79

Browse files
committed
Adding link checker script + GitHub CI
Signed-off-by: Aidan Reilly <[email protected]>
1 parent a915636 commit 8a3cc79

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed

.github/workflows/check-links.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: Check Links
2+
3+
on:
4+
pull_request:
5+
types: [opened, reopened]
6+
7+
jobs:
8+
check-links-global:
9+
name: Check All Links
10+
runs-on: ubuntu-latest
11+
container:
12+
image: asciidoctor/docker-asciidoctor
13+
steps:
14+
- uses: actions/checkout@v4
15+
16+
- name: Make script executable
17+
run: chmod +x scripts/check-links.sh
18+
19+
- name: Run link checker (global)
20+
run: scripts/check-links.sh --global
21+
check-links-modified:
22+
name: Check Modified Links
23+
runs-on: ubuntu-latest
24+
container:
25+
image: asciidoctor/docker-asciidoctor
26+
steps:
27+
- uses: actions/checkout@v4
28+
with:
29+
fetch-depth: 0 # required for git diff to work with branches
30+
31+
- name: Make script executable
32+
run: chmod +x scripts/check-links.sh
33+
34+
- name: Run link checker on modified files
35+
shell: bash
36+
run: |
37+
MODIFIED_FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD)
38+
CHECK_FILES=()
39+
40+
while IFS= read -r file; do
41+
[[ "$file" =~ ^assemblies/[^/]+\.adoc$ ]] && CHECK_FILES+=("$file")
42+
done <<< "$MODIFIED_FILES"
43+
44+
for f in "${CHECK_FILES[@]}"; do
45+
./scripts/check-links.sh "$f"
46+
done

scripts/check-links.sh

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/bin/bash
2+
#
3+
# Checks for 404 links using Asciidoctor and curl
4+
# Checks all master.adoc files by using --global
5+
6+
usage() {
7+
echo "Usage: $0 [--global] [<adoc_file>]"
8+
exit 1
9+
}
10+
11+
# Parse arguments
12+
GLOBAL_MODE=0
13+
INPUT_FILE=""
14+
15+
if [ "$1" == "--global" ]; then
16+
GLOBAL_MODE=1
17+
elif [ "$#" -eq 1 ]; then
18+
INPUT_FILE="$1"
19+
else
20+
usage
21+
fi
22+
23+
# Create temp file for flagging broken links
24+
TMP_FILE=$(mktemp)
25+
echo "0" > "$TMP_FILE"
26+
27+
# Ignore patterns
28+
IGNORE_PATTERNS=(
29+
".*docs\.google\.com.*"
30+
".*google\.com.*"
31+
".*issues\.redhat\.com.*"
32+
".*0\.0\.0\.0.*"
33+
".*localhost.*"
34+
".*registry\.redhat\.io.*"
35+
".*example\.org.*"
36+
".*github.com/example/myrepo\.git"
37+
)
38+
39+
PATTERNS_DECL=$(declare -p IGNORE_PATTERNS)
40+
41+
check_url() {
42+
local URL=$1
43+
eval "$PATTERNS_DECL"
44+
45+
# Strip trailing punctuation
46+
URL=${URL%[.,;:?!\]\)]}
47+
48+
for PATTERN in "${IGNORE_PATTERNS[@]}"; do
49+
if [[ "$URL" =~ $PATTERN ]]; then
50+
exit 0
51+
fi
52+
done
53+
54+
STATUS=$(curl -Ls -o /dev/null -w "%{http_code}" --max-time 5 --connect-timeout 2 "$URL")
55+
56+
if [[ "$STATUS" == "404" ]]; then
57+
echo -e "Invalid URL (HTTP status $STATUS): \n\033[31m$URL\033[0m"
58+
echo "1" > "$TMP_FILE"
59+
fi
60+
}
61+
62+
# Export variables and functions for xargs subshells
63+
export TMP_FILE
64+
export -f check_url
65+
66+
run_url_checks() {
67+
local FILE="$1"
68+
echo -e "\033[32mChecking: $FILE\033[0m"
69+
asciidoctor "$FILE" -o - | \
70+
grep -Eo '(http|https)://[a-zA-Z0-9./?=%_-]*' | \
71+
sort -u | \
72+
xargs -P 10 -n 1 bash -c "$PATTERNS_DECL; check_url \"\$0\""
73+
}
74+
75+
if [ "$GLOBAL_MODE" -eq 1 ]; then
76+
while IFS= read -r file; do
77+
run_url_checks "$file"
78+
done < <(find assemblies -maxdepth 1 -type f -name "*.adoc")
79+
else
80+
run_url_checks "$INPUT_FILE"
81+
fi
82+
83+
if [ "$(cat "$TMP_FILE")" -eq 1 ]; then
84+
echo "Errors found"
85+
exit 1
86+
fi

0 commit comments

Comments
 (0)