URL Checker #2204
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: URL Checker | |
| on: | |
| pull_request: | |
| paths: | |
| - '5.0/**' | |
| push: | |
| branches: | |
| - master | |
| paths: | |
| - '5.0/**' | |
| workflow_dispatch: | |
| jobs: | |
| markdown-link-check: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4.2.2 | |
| with: | |
| fetch-depth: 0 | |
| - name: Install markdown-link-check | |
| run: npm i -g markdown-link-check@3.14.2 | |
| - name: Determine files to check | |
| id: files | |
| run: | | |
| if [[ "${{ github.event_name }}" == "pull_request" ]]; then | |
| git diff --name-only --diff-filter=AM origin/${{ github.base_ref }} -- 5.0/ | grep '\.md$' > /tmp/files_to_check.txt || true | |
| echo "mode=modified" >> $GITHUB_OUTPUT | |
| else | |
| find 5.0 -name '*.md' -not -path './node_modules/*' > /tmp/files_to_check.txt | |
| echo "mode=all" >> $GITHUB_OUTPUT | |
| fi | |
| echo "Checking files (mode: $(cat $GITHUB_OUTPUT | grep mode | cut -d= -f2)):" | |
| cat /tmp/files_to_check.txt | |
| - name: Check URLs | |
| id: linkcheck | |
| continue-on-error: true | |
| run: | | |
| set -o pipefail | |
| ERROR_FOUND=0 | |
| while IFS= read -r file; do | |
| if [ -n "$file" ]; then | |
| markdown-link-check "$file" --config .github/workflows/config/url-checker-config.json -q 2>&1 | tee -a /tmp/link-check-output.txt || ERROR_FOUND=1 | |
| fi | |
| done < /tmp/files_to_check.txt | |
| if [ "$ERROR_FOUND" -eq 1 ]; then | |
| echo "" | |
| echo "ERROR: Dead links were found!" | |
| exit 1 | |
| else | |
| echo "" | |
| echo "All links are good!" | |
| fi | |
| - name: Retry status-0 failures with curl | |
| if: steps.linkcheck.outcome == 'failure' | |
| run: | | |
| echo "Retrying status-0 (connection failure) URLs with curl..." | |
| DEAD_LINKS=0 | |
| # Extract URLs that failed with status 0 | |
| grep -oP '\[✖\] \Khttps?://\S+(?= → Status: 0)' /tmp/link-check-output.txt > /tmp/status0-urls.txt || true | |
| if [ ! -s /tmp/status0-urls.txt ]; then | |
| echo "No status-0 failures found to retry - all failures are genuine dead links." | |
| exit 1 | |
| fi | |
| # Check if there were non-status-0 failures too | |
| NON_ZERO_FAILURES=$(grep '\[✖\]' /tmp/link-check-output.txt | grep -v 'Status: 0' | wc -l) | |
| if [ "$NON_ZERO_FAILURES" -gt 0 ]; then | |
| echo "Found $NON_ZERO_FAILURES non-connection failures (these are genuine dead links):" | |
| grep '\[✖\]' /tmp/link-check-output.txt | grep -v 'Status: 0' | |
| DEAD_LINKS=1 | |
| fi | |
| echo "" | |
| echo "Retrying $(wc -l < /tmp/status0-urls.txt) status-0 URL(s) with curl..." | |
| echo "" | |
| while IFS= read -r url; do | |
| if [ -n "$url" ]; then | |
| HTTP_CODE=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 30 --retry 2 --retry-delay 5 -L -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0" "$url" 2>/dev/null || echo "000") | |
| if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 400 ]; then | |
| echo "[✓] $url → curl status: $HTTP_CODE (OK)" | |
| else | |
| echo "[✖] $url → curl status: $HTTP_CODE (FAILED)" | |
| DEAD_LINKS=1 | |
| fi | |
| fi | |
| done < /tmp/status0-urls.txt | |
| echo "" | |
| if [ "$DEAD_LINKS" -eq 1 ]; then | |
| echo "ERROR: Dead links confirmed after retry!" | |
| exit 1 | |
| else | |
| echo "All status-0 URLs passed curl retry - links are alive!" | |
| fi |