diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml new file mode 100644 index 000000000..b32cb3c43 --- /dev/null +++ b/.github/workflows/check-links.yml @@ -0,0 +1,33 @@ +name: Check links in AsciiDoc + +on: [push, pull_request] + +jobs: + check-links: + name: Check links in modified files + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install Asciidoctor + run: | + sudo apt-get update + sudo apt-get install -y asciidoctor + + - name: Make scripts executable + run: | + chmod +x scripts/check-links.sh + chmod +x scripts/check-modified.sh + + - name: Fetch base branch + run: git fetch origin main + + - name: Check links in modified files + shell: bash + run: | + scripts/check-modified.sh + + diff --git a/scripts/check-links.sh b/scripts/check-links.sh new file mode 100644 index 000000000..c5a2e9464 --- /dev/null +++ b/scripts/check-links.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# +# Checks for 404 links using Asciidoctor and curl + +usage() { + echo "Usage: $0 []" + exit 1 +} + +# Parse arguments +INPUT_FILE="" + +# Check dependencies +if ! asciidoctor -v >/dev/null 2>&1; then + echo "Error: Asciidoctor is not installed" >&2 + exit 1 +fi + +INPUT_FILE="$1" + +if [ $# -eq 0 ]; then + usage +fi + +# Create temp file for flagging broken links +TMP_FILE=$(mktemp) +echo "0" > "$TMP_FILE" + +# Load ignore patterns from external file +IGNORE_FILE="$(dirname "$0")/links.ignore" + +if [ ! -f "$IGNORE_FILE" ]; then + echo "Error: Missing ignore patterns file: $IGNORE_FILE" >&2 + exit 1 +fi + +mapfile -t IGNORE_PATTERNS < "$IGNORE_FILE" +PATTERNS_DECL=$(declare -p IGNORE_PATTERNS) + +check_url() { + local URL=$1 + eval "$PATTERNS_DECL" + + URL=${URL%[.,;:?!\]\)]} + + for PATTERN in "${IGNORE_PATTERNS[@]}"; do + if [[ "$URL" =~ $PATTERN ]]; then + exit 0 + fi + done + + STATUS=$(curl -Ls -o /dev/null -w "%{http_code}" --max-time 5 --connect-timeout 2 "$URL") + + if [[ "$STATUS" != "000" && "$STATUS" != "403" && ! "$STATUS" =~ ^(2|3)[0-9]{2}$ ]]; then + echo -e "Invalid URL (HTTP status $STATUS): \n\033[31m$URL\033[0m" + echo "1" > "$TMP_FILE" + fi +} + +export TMP_FILE +export -f check_url + +run_url_checks() { + local FILE="$1" + echo -e "\033[32mChecking: $FILE\033[0m" + asciidoctor "$FILE" -a doctype=book -o - | \ + grep -Eo '(http|https)://[a-zA-Z0-9./?=%_-]*' | \ + sort -u | \ + xargs -P 10 -n 1 bash -c "$PATTERNS_DECL; check_url \"\$0\"" +} + +run_url_checks "$INPUT_FILE" + +if [ "$(cat "$TMP_FILE")" -eq 1 ]; then + echo "Errors found" + exit 1 +fi diff --git a/scripts/check-modified.sh b/scripts/check-modified.sh new file mode 100644 index 000000000..cea9a6dca --- /dev/null +++ b/scripts/check-modified.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Checks for 404 links in a compiled list of modified books + +ERRORS=0 + +FILES=$(git diff --name-only origin/main...HEAD --diff-filter=d -- "*.adoc") + +MODULES=$(echo "$FILES" | grep '^modules/.*\.adoc$') +ASSEMBLIES=$(echo "$FILES" | grep '^assemblies/.*\.adoc$') +BOOKS=$(echo "$FILES" | grep -E '^[^/]+\.adoc$') + +UPDATED_BOOKS=() + +if [ -n "$MODULES" ]; then + # Check for assemblies and books that include modified modules + while IFS= read -r module; do + mapfile -t updated_books < <(grep -rnwl . --include="*.adoc" --exclude-dir={_artifacts,modules,assemblies} -e "$(basename "$module")") + UPDATED_BOOKS+=( "${updated_books[@]}" ) + + mapfile -t updated_books < <(grep -rnwl assemblies --include="*.adoc" --exclude-dir={_artifacts,modules} -e "$(basename "$module")") + UPDATED_BOOKS+=( "${updated_books[@]}" ) + done <<< "$MODULES" +fi + +# Check for books that include modified assemblies +if [ -n "$ASSEMBLIES" ]; then + while IFS= read -r assembly; do + mapfile -t results3 < <(grep -rnwl . --include="*.adoc" --exclude-dir={_artifacts,modules,assemblies} -e "$(basename "$assembly")") + UPDATED_BOOKS+=( "${results3[@]}" ) + done <<< "$ASSEMBLIES" +fi + +# Check for directly updated books +if [ -n "$BOOKS" ]; then + while IFS= read -r book; do + UPDATED_BOOKS+=( "$book" ) + done <<< "$BOOKS" +fi + +if [ ${#UPDATED_BOOKS[@]} -eq 0 ]; then + echo "No modified books. Skipping link check." + exit 0 +fi + +# Check links in the compiled list of books + +for f in "${UPDATED_BOOKS[@]}"; do + echo "Checking: $f" + if ! ./scripts/check-links.sh "$f"; then + echo "❌ Link check failed for: $f" + ERRORS=1 + fi +done + +if [ "$ERRORS" -ne 0 ]; then + echo "One or more link checks failed." + exit 1 +fi \ No newline at end of file diff --git a/scripts/links.ignore b/scripts/links.ignore new file mode 100644 index 000000000..29095e508 --- /dev/null +++ b/scripts/links.ignore @@ -0,0 +1,15 @@ +# Add ignore link regexes one per line +.*docs\.google\.com.* +.*google\.com.* +.*issues\.redhat\.com.* +.*0\.0\.0\.0.* +.*localhost.* +.*registry\.redhat\.io.* +.*example\.org.* +.*github.com/example/myrepo\.git +.*fonts\.googleapis\.com.* +.*mixtral-my-project.apps\.my-cluster\.com.* +.*openshiftapps\.com.* +.*minio-cluster\.local.* +.*codeflare-operator-webhook-service\.redhat-ods-applications\.svc +.*example.com.*