Skip to content

Commit eac4c9f

Browse files
Update Broken-links-checker-final.yml
1 parent 0f7116c commit eac4c9f

File tree

1 file changed

+81
-39
lines changed

1 file changed

+81
-39
lines changed
Lines changed: 81 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Broken Link Checker
1+
name: Smart Broken Link Checker
22

33
on:
44
pull_request:
@@ -10,51 +10,93 @@ permissions:
1010
contents: read
1111

1212
jobs:
13-
markdown-link-check:
14-
name: Check Markdown Broken Links
13+
smart-link-check:
14+
name: Filtered Markdown Broken Links
1515
runs-on: ubuntu-latest
1616

1717
steps:
18-
- name: Checkout Repo
18+
- name: Checkout Repository
1919
uses: actions/checkout@v4
2020
with:
2121
fetch-depth: 0
2222

23-
- name: Get Added/Modified Markdown Files (PR only)
24-
id: changed-files
25-
if: github.event_name == 'pull_request'
23+
- name: Install lychee
2624
run: |
27-
git fetch origin ${{ github.base_ref }}
28-
files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '\.md$' || true)
29-
echo "md_files<<EOF" >> $GITHUB_OUTPUT
30-
echo "$files" >> $GITHUB_OUTPUT
31-
echo "EOF" >> $GITHUB_OUTPUT
32-
- name: Check Broken Links in Added/Modified Files (PR)
33-
if: github.event_name == 'pull_request' && steps.changed-files.outputs.md_files != ''
34-
uses: lycheeverse/[email protected]
35-
with:
36-
args: >
37-
--verbose --exclude-mail --no-progress --exclude ^https?://
38-
${{ steps.changed-files.outputs.md_files }}
39-
env:
40-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
41-
42-
- name: Preprocess All Markdown Files (strip comments)
43-
if: github.event_name == 'workflow_dispatch'
25+
curl -Ls https://github.com/lycheeverse/lychee/releases/latest/download/lychee-linux.zip -o lychee.zip
26+
unzip lychee.zip -d lychee-bin
27+
chmod +x lychee-bin/lychee
28+
echo "$PWD/lychee-bin" >> $GITHUB_PATH
29+
30+
- name: Determine Target Files
31+
id: target-files
32+
run: |
33+
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
34+
git fetch origin ${{ github.base_ref }}
35+
files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '\.md$' || true)
36+
else
37+
files=$(find . -type f -name "*.md")
38+
fi
39+
echo "$files" > all-md-files.txt
40+
echo "files=$(cat all-md-files.txt | paste -sd ' ' -)" >> $GITHUB_OUTPUT
41+
42+
- name: Run Lychee (Initial)
43+
run: |
44+
mkdir -p lychee-output
45+
lychee --verbose --no-progress --exclude-mail --format json \
46+
${{ steps.target-files.outputs.files }} > lychee-output/report.json
47+
# Also produce plain-text report for human reading
48+
lychee --verbose --no-progress --exclude-mail \
49+
${{ steps.target-files.outputs.files }} > lychee-output/report.txt || true
50+
51+
- name: Extract Failed Files
52+
id: extract-failed
53+
run: |
54+
mkdir -p sanitize-targets
55+
jq -r '.failures[].input' lychee-output/report.json | sort -u > failed-files.txt
56+
if [[ -s failed-files.txt ]]; then
57+
echo "files_to_sanitize=$(cat failed-files.txt | paste -sd ' ' -)" >> $GITHUB_OUTPUT
58+
fi
59+
60+
- name: Sanitize Failed Files (Remove Comments)
61+
if: steps.extract-failed.outputs.files_to_sanitize != ''
4462
run: |
45-
mkdir -p cleaned_md
46-
find . -name "*.md" ! -path "./cleaned_md/*" | while read file; do
47-
mkdir -p "cleaned_md/$(dirname "$file")"
48-
sed -E '/<!--/,/-->/d' "$file" > "cleaned_md/$file"
49-
done
50-
51-
- name: Check Broken Links in Entire Repo (Manual)
52-
if: github.event_name == 'workflow_dispatch'
53-
uses: lycheeverse/[email protected]
63+
mkdir -p sanitized_md
64+
while IFS= read -r file; do
65+
[ -f "$file" ] || continue
66+
mkdir -p "sanitized_md/$(dirname "$file")"
67+
perl -0777 -pe 's/<!--.*?-->//gs' "$file" > "sanitized_md/$file"
68+
done < failed-files.txt
69+
70+
- name: Re-run Lychee on Sanitized Files
71+
if: steps.extract-failed.outputs.files_to_sanitize != ''
72+
run: |
73+
mkdir -p lychee-output-sanitized
74+
while IFS= read -r file; do
75+
[ -f "sanitized_md/$file" ] || continue
76+
echo "🔍 Rechecking: sanitized_md/$file"
77+
lychee --verbose --no-progress --exclude-mail "sanitized_md/$file" || echo "$file" >> final-failures.txt
78+
done < failed-files.txt
79+
80+
- name: Upload Lychee Reports
81+
uses: actions/upload-artifact@v4
5482
with:
55-
args: >
56-
--verbose --exclude-mail --no-progress --exclude ^https?://
57-
cleaned_md/**/*.md
58-
output: lychee/out.md
59-
env:
60-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83+
name: lychee-original-reports
84+
path: lychee-output
85+
86+
- name: Upload Sanitized Report Failures
87+
if: steps.extract-failed.outputs.files_to_sanitize != ''
88+
uses: actions/upload-artifact@v4
89+
with:
90+
name: lychee-sanitized-failures
91+
path: |
92+
final-failures.txt
93+
sanitized_md
94+
95+
- name: Fail if Broken Links Remain
96+
if: failure() && steps.extract-failed.outputs.files_to_sanitize != ''
97+
run: |
98+
if [ -s final-failures.txt ]; then
99+
echo "🚫 Broken links still exist after comment-stripping:"
100+
cat final-failures.txt
101+
exit 1
102+
fi

0 commit comments

Comments
 (0)