Skip to content

Commit 007019d

Browse files
committed
Adding link checker
1 parent a915636 commit 007019d

File tree

4 files changed

+184
-0
lines changed

4 files changed

+184
-0
lines changed

.github/workflows/check-links.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Check links in AsciiDoc
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
check-links:
7+
name: Check links in modified files
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v4
12+
with:
13+
fetch-depth: 0
14+
15+
- name: Install Asciidoctor
16+
run: |
17+
sudo apt-get update
18+
sudo apt-get install -y asciidoctor
19+
20+
- name: Make scripts executable
21+
run: |
22+
chmod +x scripts/check-links.sh
23+
chmod +x scripts/check-modified.sh
24+
25+
- name: Fetch base branch
26+
run: git fetch origin main
27+
28+
- name: Check links in modified files
29+
shell: bash
30+
run: |
31+
scripts/check-modified.sh
32+
33+

scripts/check-links.sh

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/bash
2+
#
3+
# Checks for 404 links using Asciidoctor and curl
4+
5+
usage() {
6+
echo "Usage: $0 [<adoc_file>]"
7+
exit 1
8+
}
9+
10+
# Parse arguments
11+
INPUT_FILE=""
12+
13+
# Check dependencies
14+
if ! asciidoctor -v >/dev/null 2>&1; then
15+
echo "Error: Asciidoctor is not installed" >&2
16+
exit 1
17+
fi
18+
19+
INPUT_FILE="$1"
20+
21+
if [ $# -eq 0 ]; then
22+
usage
23+
fi
24+
25+
# Create temp file for flagging broken links
26+
TMP_FILE=$(mktemp)
27+
echo "0" > "$TMP_FILE"
28+
29+
# Load ignore patterns from external file
30+
IGNORE_FILE="$(dirname "$0")/links.ignore"
31+
32+
if [ ! -f "$IGNORE_FILE" ]; then
33+
echo "Error: Missing ignore patterns file: $IGNORE_FILE" >&2
34+
exit 1
35+
fi
36+
37+
mapfile -t IGNORE_PATTERNS < "$IGNORE_FILE"
38+
PATTERNS_DECL=$(declare -p IGNORE_PATTERNS)
39+
40+
check_url() {
41+
local URL=$1
42+
eval "$PATTERNS_DECL"
43+
44+
URL=${URL%[.,;:?!\]\)]}
45+
46+
for PATTERN in "${IGNORE_PATTERNS[@]}"; do
47+
if [[ "$URL" =~ $PATTERN ]]; then
48+
exit 0
49+
fi
50+
done
51+
52+
STATUS=$(curl -Ls -o /dev/null -w "%{http_code}" --max-time 5 --connect-timeout 2 "$URL")
53+
54+
if [[ "$STATUS" != "000" && "$STATUS" != "403" && ! "$STATUS" =~ ^(2|3)[0-9]{2}$ ]]; then
55+
echo -e "Invalid URL (HTTP status $STATUS): \n\033[31m$URL\033[0m"
56+
echo "1" > "$TMP_FILE"
57+
fi
58+
}
59+
60+
export TMP_FILE
61+
export -f check_url
62+
63+
run_url_checks() {
64+
local FILE="$1"
65+
echo -e "\033[32mChecking: $FILE\033[0m"
66+
asciidoctor "$FILE" -a doctype=book -o - | \
67+
grep -Eo '(http|https)://[a-zA-Z0-9./?=%_-]*' | \
68+
sort -u | \
69+
xargs -P 10 -n 1 bash -c "$PATTERNS_DECL; check_url \"\$0\""
70+
}
71+
72+
run_url_checks "$INPUT_FILE"
73+
74+
if [ "$(cat "$TMP_FILE")" -eq 1 ]; then
75+
echo "Errors found"
76+
exit 1
77+
fi

scripts/check-modified.sh

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/bash
2+
#
3+
# Checks for 404 links in a compiled list of modified books
4+
5+
ERRORS=0
6+
7+
FILES=$(git diff --name-only origin/main...HEAD --diff-filter=d -- "*.adoc")
8+
9+
MODULES=$(echo "$FILES" | grep '^modules/.*\.adoc$')
10+
ASSEMBLIES=$(echo "$FILES" | grep '^assemblies/.*\.adoc$')
11+
BOOKS=$(echo "$FILES" | grep -E '^[^/]+\.adoc$')
12+
13+
UPDATED_BOOKS=()
14+
15+
if [ -n "$MODULES" ]; then
16+
# Check for assemblies and books that include modified modules
17+
while IFS= read -r module; do
18+
mapfile -t updated_books < <(grep -rnwl . --include="*.adoc" --exclude-dir={_artifacts,modules,assemblies} -e "$(basename "$module")")
19+
UPDATED_BOOKS+=( "${updated_books[@]}" )
20+
21+
mapfile -t updated_books < <(grep -rnwl assemblies --include="*.adoc" --exclude-dir={_artifacts,modules} -e "$(basename "$module")")
22+
UPDATED_BOOKS+=( "${updated_books[@]}" )
23+
done <<< "$MODULES"
24+
fi
25+
26+
# Check for books that include modified assemblies
27+
if [ -n "$ASSEMBLIES" ]; then
28+
while IFS= read -r assembly; do
29+
mapfile -t results3 < <(grep -rnwl . --include="*.adoc" --exclude-dir={_artifacts,modules,assemblies} -e "$(basename "$assembly")")
30+
UPDATED_BOOKS+=( "${results3[@]}" )
31+
done <<< "$ASSEMBLIES"
32+
fi
33+
34+
# Check for directly updated books
35+
if [ -n "$BOOKS" ]; then
36+
while IFS= read -r book; do
37+
UPDATED_BOOKS+=( "$book" )
38+
done <<< "$BOOKS"
39+
fi
40+
41+
if [ ${#UPDATED_BOOKS[@]} -eq 0 ]; then
42+
echo "No modified books. Skipping link check."
43+
exit 0
44+
fi
45+
46+
# Check links in the compiled list of books
47+
48+
for f in "${UPDATED_BOOKS[@]}"; do
49+
echo "Checking: $f"
50+
if ! ./scripts/check-links.sh "$f"; then
51+
echo "❌ Link check failed for: $f"
52+
ERRORS=1
53+
fi
54+
done
55+
56+
if [ "$ERRORS" -ne 0 ]; then
57+
echo "One or more link checks failed."
58+
exit 1
59+
fi

scripts/links.ignore

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Add ignore link regexes one per line
2+
.*docs\.google\.com.*
3+
.*google\.com.*
4+
.*issues\.redhat\.com.*
5+
.*0\.0\.0\.0.*
6+
.*localhost.*
7+
.*registry\.redhat\.io.*
8+
.*example\.org.*
9+
.*github.com/example/myrepo\.git
10+
.*fonts\.googleapis\.com.*
11+
.*mixtral-my-project.apps\.my-cluster\.com.*
12+
.*openshiftapps\.com.*
13+
.*minio-cluster\.local.*
14+
.*codeflare-operator-webhook-service\.redhat-ods-applications\.svc
15+
.*example.com.*

0 commit comments

Comments
 (0)