1- name : Broken Link Checker
1+ name : Smart Broken Link Checker
22
33on :
44 pull_request :
@@ -10,51 +10,93 @@ permissions:
1010 contents : read
1111
1212jobs :
13- markdown -link-check :
14- name : Check Markdown Broken Links
13+ smart -link-check :
14+ name : Filtered Markdown Broken Links
1515 runs-on : ubuntu-latest
1616
1717 steps :
18- - name : Checkout Repo
18+ - name : Checkout Repository
1919 uses : actions/checkout@v4
2020 with :
2121 fetch-depth : 0
2222
23- - name : Get Added/Modified Markdown Files (PR only)
24- id : changed-files
25- if : github.event_name == 'pull_request'
23+ - name : Install lychee
2624 run : |
27- git fetch origin ${{ github.base_ref }}
28- files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '\.md$' || true)
29- echo "md_files<<EOF" >> $GITHUB_OUTPUT
30- echo "$files" >> $GITHUB_OUTPUT
31- echo "EOF" >> $GITHUB_OUTPUT
32- - name : Check Broken Links in Added/Modified Files (PR)
33- if : github.event_name == 'pull_request' && steps.changed-files.outputs.md_files != ''
34- uses :
lycheeverse/[email protected] 35- with :
36- args : >
37- --verbose --exclude-mail --no-progress --exclude ^https?://
38- ${{ steps.changed-files.outputs.md_files }}
39- env :
40- GITHUB_TOKEN : ${{ secrets.GITHUB_TOKEN }}
41-
42- - name : Preprocess All Markdown Files (strip comments)
43- if : github.event_name == 'workflow_dispatch'
25+ curl -Ls https://github.com/lycheeverse/lychee/releases/latest/download/lychee-linux.zip -o lychee.zip
26+ unzip lychee.zip -d lychee-bin
27+ chmod +x lychee-bin/lychee
28+ echo "$PWD/lychee-bin" >> $GITHUB_PATH
29+
30+ - name : Determine Target Files
31+ id : target-files
32+ run : |
33+ if [[ "${{ github.event_name }}" == "pull_request" ]]; then
34+ git fetch origin ${{ github.base_ref }}
35+ files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '\.md$' || true)
36+ else
37+ files=$(find . -type f -name "*.md")
38+ fi
39+ echo "$files" > all-md-files.txt
40+ echo "files=$(cat all-md-files.txt | paste -sd ' ' -)" >> $GITHUB_OUTPUT
41+
42+ - name : Run Lychee (Initial)
43+ run : |
44+ mkdir -p lychee-output
45+ lychee --verbose --no-progress --exclude-mail --format json \
46+ ${{ steps.target-files.outputs.files }} > lychee-output/report.json
47+ # Also produce plain-text report for human reading
48+ lychee --verbose --no-progress --exclude-mail \
49+ ${{ steps.target-files.outputs.files }} > lychee-output/report.txt || true
50+
51+ - name : Extract Failed Files
52+ id : extract-failed
53+ run : |
54+ mkdir -p sanitize-targets
55+ jq -r '.failures[].input' lychee-output/report.json | sort -u > failed-files.txt
56+ if [[ -s failed-files.txt ]]; then
57+ echo "files_to_sanitize=$(cat failed-files.txt | paste -sd ' ' -)" >> $GITHUB_OUTPUT
58+ fi
59+
60+ - name : Sanitize Failed Files (Remove Comments)
61+ if : steps.extract-failed.outputs.files_to_sanitize != ''
4462 run : |
45- mkdir -p cleaned_md
46- find . -name "*.md" ! -path "./cleaned_md/*" | while read file; do
47- mkdir -p "cleaned_md/$(dirname "$file")"
48- sed -E '/<!--/,/-->/d' "$file" > "cleaned_md/$file"
49- done
50-
51- - name : Check Broken Links in Entire Repo (Manual)
52- if : github.event_name == 'workflow_dispatch'
53- uses :
lycheeverse/[email protected] 63+ mkdir -p sanitized_md
64+ while IFS= read -r file; do
65+ [ -f "$file" ] || continue
66+ mkdir -p "sanitized_md/$(dirname "$file")"
67+ perl -0777 -pe 's/<!--.*?-->//gs' "$file" > "sanitized_md/$file"
68+ done < failed-files.txt
69+
70+ - name : Re-run Lychee on Sanitized Files
71+ if : steps.extract-failed.outputs.files_to_sanitize != ''
72+ run : |
73+ mkdir -p lychee-output-sanitized
74+ while IFS= read -r file; do
75+ [ -f "sanitized_md/$file" ] || continue
76+ echo "🔍 Rechecking: sanitized_md/$file"
77+ lychee --verbose --no-progress --exclude-mail "sanitized_md/$file" || echo "$file" >> final-failures.txt
78+ done < failed-files.txt
79+
80+ - name : Upload Lychee Reports
81+ uses : actions/upload-artifact@v4
5482 with :
55- args : >
56- --verbose --exclude-mail --no-progress --exclude ^https?://
57- cleaned_md/**/*.md
58- output : lychee/out.md
59- env :
60- GITHUB_TOKEN : ${{ secrets.GITHUB_TOKEN }}
83+ name : lychee-original-reports
84+ path : lychee-output
85+
86+ - name : Upload Sanitized Report Failures
87+ if : steps.extract-failed.outputs.files_to_sanitize != ''
88+ uses : actions/upload-artifact@v4
89+ with :
90+ name : lychee-sanitized-failures
91+ path : |
92+ final-failures.txt
93+ sanitized_md
94+
95+ - name : Fail if Broken Links Remain
96+ if : failure() && steps.extract-failed.outputs.files_to_sanitize != ''
97+ run : |
98+ if [ -s final-failures.txt ]; then
99+ echo "🚫 Broken links still exist after comment-stripping:"
100+ cat final-failures.txt
101+ exit 1
102+ fi
0 commit comments