1- name : Smart Broken Link Checker
1+ name : Broken Link Checker
22
33on :
44 pull_request :
@@ -10,93 +10,97 @@ permissions:
1010 contents : read
1111
1212jobs :
13- smart -link-check :
14- name : Filtered Markdown Broken Links
13+ markdown -link-check :
14+ name : Check Markdown Broken Links
1515 runs-on : ubuntu-latest
1616
1717 steps :
18- - name : Checkout Repository
18+ - name : Checkout Repo
1919 uses : actions/checkout@v4
2020 with :
2121 fetch-depth : 0
2222
23- - name : Install lychee
23+ - name : Get Added/Modified Markdown Files (PR only)
24+ id : changed-files
25+ if : github.event_name == 'pull_request'
2426 run : |
25- curl -Ls https://github.com/lycheeverse/lychee/releases/latest/download/lychee-linux.zip -o lychee.zip
26- unzip lychee.zip -d lychee-bin
27- chmod +x lychee-bin/lychee
28- echo "$PWD/lychee-bin" >> $GITHUB_PATH
27+ git fetch origin ${{ github.base_ref }}
28+ files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '\.md$' || true)
29+ echo "md_files<<EOF" >> $GITHUB_OUTPUT
30+ echo "$files" >> $GITHUB_OUTPUT
31+ echo "EOF" >> $GITHUB_OUTPUT
2932
30- - name : Determine Target Files
31- id : target -files
33+ - name : Run Lychee (PR)
34+ if : github.event_name == 'pull_request' && steps.changed -files.outputs.md_files != ''
3235 run : |
33- if [[ "${{ github.event_name }}" == "pull_request" ]]; then
34- git fetch origin ${{ github.base_ref }}
35- files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '\.md$' || true)
36- else
37- files=$(find . -type f -name "*.md")
38- fi
39- echo "$files" > all-md-files.txt
40- echo "files=$(cat all-md-files.txt | paste -sd ' ' -)" >> $GITHUB_OUTPUT
36+ npx lychee --verbose --exclude-mail --no-progress --exclude ^https?:// ${{ steps.changed-files.outputs.md_files }} > lychee_output.txt || true
4137
42- - name : Run Lychee (Initial)
43- run : |
44- mkdir -p lychee-output
45- lychee --verbose --no-progress --exclude-mail --format json \
46- ${{ steps.target-files.outputs.files }} > lychee-output/report.json
47- # Also produce plain-text report for human reading
48- lychee --verbose --no-progress --exclude-mail \
49- ${{ steps.target-files.outputs.files }} > lychee-output/report.txt || true
50-
51- - name : Extract Failed Files
52- id : extract-failed
38+ - name : Run Lychee (Manual)
39+ if : github.event_name == 'workflow_dispatch'
5340 run : |
54- mkdir -p sanitize-targets
55- jq -r '.failures[].input' lychee-output/report.json | sort -u > failed-files.txt
56- if [[ -s failed-files.txt ]]; then
57- echo "files_to_sanitize=$(cat failed-files.txt | paste -sd ' ' -)" >> $GITHUB_OUTPUT
58- fi
41+ npx lychee --verbose --exclude-mail --no-progress --exclude ^https?:// '**/*.md' > lychee_output.txt || true
5942
60- - name : Sanitize Failed Files (Remove Comments)
61- if : steps.extract-failed.outputs.files_to_sanitize != ''
43+ - name : Filter Commented-Out Links from Output
6244 run : |
63- mkdir -p sanitized_md
64- while IFS= read -r file; do
65- [ -f "$file" ] || continue
66- mkdir -p "sanitized_md/$(dirname "$file")"
67- perl -0777 -pe 's/<!--.*?-->//gs' "$file" > "sanitized_md/$file"
68- done < failed-files.txt
69-
70- - name : Re-run Lychee on Sanitized Files
71- if : steps.extract-failed.outputs.files_to_sanitize != ''
45+ cat > filter_lychee_output.sh <<'EOF'
46+ #!/bin/bash
47+ LYCHEE_OUTPUT_FILE="lychee_output.txt"
48+ FINAL_OUTPUT_FILE="lychee_final_output.txt"
49+ > "$FINAL_OUTPUT_FILE"
50+
51+ current_file=""
52+
53+ while IFS= read -r line; do
54+ if [[ "$line" =~ ^Errors\ in\ (.*)\ \[ERROR\]?$ ]]; then
55+ current_file="${BASH_REMATCH[1]}"
56+ echo "$line" >> "$FINAL_OUTPUT_FILE"
57+ continue
58+ fi
59+
60+ if [[ "$line" =~ file://(.*)\ \| ]]; then
61+ full_path="${BASH_REMATCH[1]}"
62+ broken_link=$(basename "$full_path")
63+
64+ if [ -f "$current_file" ]; then
65+ if grep -zoP "(?s)<!--.*?$broken_link.*?-->" "$current_file" > /dev/null; then
66+ echo "Skipping commented-out link: $broken_link in $current_file"
67+ continue
68+ fi
69+ fi
70+
71+ echo "$line" >> "$FINAL_OUTPUT_FILE"
72+ fi
73+ done < "$LYCHEE_OUTPUT_FILE"
74+
75+ echo "Filtered Lychee output saved to $FINAL_OUTPUT_FILE"
76+ EOF
77+
78+ chmod +x filter_lychee_output.sh
79+ ./filter_lychee_output.sh
80+
81+ - name : Save Filtered Errors to Markdown File
7282 run : |
73- mkdir -p lychee-output-sanitized
74- while IFS= read -r file; do
75- [ -f "sanitized_md/$file" ] || continue
76- echo "🔍 Rechecking: sanitized_md/$file"
77- lychee --verbose --no-progress --exclude-mail "sanitized_md/$file" || echo "$file" >> final-failures.txt
78- done < failed-files.txt
79-
80- - name : Upload Lychee Reports
81- uses : actions/upload-artifact@v4
82- with :
83- name : lychee-original-reports
84- path : lychee-output
83+ mkdir -p lychee
84+ echo "# 🔗 Broken Link Report" > lychee/out.md
85+ echo "" >> lychee/out.md
86+ if [ -s lychee_final_output.txt ]; then
87+ cat lychee_final_output.txt >> lychee/out.md
88+ else
89+ echo "✅ No non-commented broken links found." >> lychee/out.md
90+ fi
8591
86- - name : Upload Sanitized Report Failures
87- if : steps.extract-failed.outputs.files_to_sanitize != ''
92+ - name : Upload Filtered Report as Artifact
8893 uses : actions/upload-artifact@v4
8994 with :
90- name : lychee-sanitized-failures
91- path : |
92- final-failures.txt
93- sanitized_md
95+ name : filtered-link-report
96+ path : lychee/out.md
9497
95- - name : Fail if Broken Links Remain
96- if : failure() && steps.extract-failed.outputs.files_to_sanitize != ''
98+ - name : Fail if Real Broken Links Remain
9799 run : |
98- if [ -s final-failures .txt ] ; then
99- echo "🚫 Broken links still exist after comment-stripping :"
100- cat final-failures .txt
100+ if grep -q "\[ERROR\]" lychee_final_output .txt; then
101+ echo "❌ Found non-commented broken links :"
102+ cat lychee_final_output .txt
101103 exit 1
104+ else
105+ echo "✅ No real broken links found."
102106 fi
0 commit comments