Skip to content

URL Checker

URL Checker #2204

Workflow file for this run

name: URL Checker
on:
pull_request:
paths:
- '5.0/**'
push:
branches:
- master
paths:
- '5.0/**'
workflow_dispatch:
jobs:
markdown-link-check:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4.2.2
with:
fetch-depth: 0
- name: Install markdown-link-check
run: npm i -g markdown-link-check@3.14.2
- name: Determine files to check
id: files
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
git diff --name-only --diff-filter=AM origin/${{ github.base_ref }} -- 5.0/ | grep '\.md$' > /tmp/files_to_check.txt || true
echo "mode=modified" >> $GITHUB_OUTPUT
else
find 5.0 -name '*.md' -not -path './node_modules/*' > /tmp/files_to_check.txt
echo "mode=all" >> $GITHUB_OUTPUT
fi
echo "Checking files (mode: $(cat $GITHUB_OUTPUT | grep mode | cut -d= -f2)):"
cat /tmp/files_to_check.txt
- name: Check URLs
id: linkcheck
continue-on-error: true
run: |
set -o pipefail
ERROR_FOUND=0
while IFS= read -r file; do
if [ -n "$file" ]; then
markdown-link-check "$file" --config .github/workflows/config/url-checker-config.json -q 2>&1 | tee -a /tmp/link-check-output.txt || ERROR_FOUND=1
fi
done < /tmp/files_to_check.txt
if [ "$ERROR_FOUND" -eq 1 ]; then
echo ""
echo "ERROR: Dead links were found!"
exit 1
else
echo ""
echo "All links are good!"
fi
- name: Retry status-0 failures with curl
if: steps.linkcheck.outcome == 'failure'
run: |
echo "Retrying status-0 (connection failure) URLs with curl..."
DEAD_LINKS=0
# Extract URLs that failed with status 0
grep -oP '\[✖\] \Khttps?://\S+(?= → Status: 0)' /tmp/link-check-output.txt > /tmp/status0-urls.txt || true
if [ ! -s /tmp/status0-urls.txt ]; then
echo "No status-0 failures found to retry - all failures are genuine dead links."
exit 1
fi
# Check if there were non-status-0 failures too
NON_ZERO_FAILURES=$(grep '\[✖\]' /tmp/link-check-output.txt | grep -v 'Status: 0' | wc -l)
if [ "$NON_ZERO_FAILURES" -gt 0 ]; then
echo "Found $NON_ZERO_FAILURES non-connection failures (these are genuine dead links):"
grep '\[✖\]' /tmp/link-check-output.txt | grep -v 'Status: 0'
DEAD_LINKS=1
fi
echo ""
echo "Retrying $(wc -l < /tmp/status0-urls.txt) status-0 URL(s) with curl..."
echo ""
while IFS= read -r url; do
if [ -n "$url" ]; then
HTTP_CODE=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 30 --retry 2 --retry-delay 5 -L -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0" "$url" 2>/dev/null || echo "000")
if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 400 ]; then
echo "[✓] $url → curl status: $HTTP_CODE (OK)"
else
echo "[✖] $url → curl status: $HTTP_CODE (FAILED)"
DEAD_LINKS=1
fi
fi
done < /tmp/status0-urls.txt
echo ""
if [ "$DEAD_LINKS" -eq 1 ]; then
echo "ERROR: Dead links confirmed after retry!"
exit 1
else
echo "All status-0 URLs passed curl retry - links are alive!"
fi