Skip to content

Commit 7fa1a2f

Browse files
Update Broken-links-checker-final.yml
1 parent 41afa74 commit 7fa1a2f

File tree

1 file changed

+83
-69
lines changed

1 file changed

+83
-69
lines changed
Lines changed: 83 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,85 @@
1-
name: Broken Link Checker (lychee with comment strip)
1+
name: Broken Link Checker
2+
23
on:
3-
pull_request:
4-
paths:
5-
- '**/*.md'
6-
workflow_dispatch:
7-
permissions:
8-
contents: read
4+
pull_request:
5+
paths:
6+
- '**/*.md'
7+
workflow_dispatch:
8+
99
jobs:
10-
markdown-link-check:
11-
runs-on: ubuntu-latest
12-
steps:
13-
- name: Checkout Repo
14-
uses: actions/checkout@v4
15-
- name: Install Python
16-
uses: actions/setup-python@v4
17-
with:
18-
python-version: '3.x'
19-
- name: Strip HTML comments from changed Markdown files
20-
if: github.event_name == 'pull_request'
21-
id: strip
22-
run: |
23-
echo "cleaned_files=" >> $GITHUB_OUTPUT
24-
for file in ${{ github.event.pull_request.changed_files }}; do
25-
[ "${file##*.}" != "md" ] && continue
26-
python3 - <<'EOF'
27-
import re, sys
28-
path = sys.argv[1]
29-
with open(path, 'r') as f:
30-
content = f.read()
31-
clean = re.sub(r'<!--.*?-->', '', content, flags=re.DOTALL)
32-
out = path + ".cleaned"
33-
with open(out, 'w') as f:
34-
f.write(clean)
35-
print(out)
36-
EOF
37-
echo "cleaned_files<<EOF" >> $GITHUB_OUTPUT
38-
echo "${file}.cleaned" >> $GITHUB_OUTPUT
39-
echo "EOF" >> $GITHUB_OUTPUT
40-
done
41-
- name: Run lychee on cleaned files
42-
if: github.event_name == 'pull_request' && steps.strip.outputs.cleaned_files != ''
43-
uses: lycheeverse/[email protected]
44-
with:
45-
args: >
46-
--verbose --exclude-mail --no-progress --exclude ^https?://
47-
${{ steps.strip.outputs.cleaned_files }}
48-
env:
49-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
50-
- name: Run lychee on all Markdown (manual)
51-
if: github.event_name == 'workflow_dispatch'
52-
run: |
53-
python3 - <<'EOF'
54-
import re, glob
55-
for path in glob.glob('**/*.md', recursive=True):
56-
with open(path) as f: data = f.read()
57-
clean = re.sub(r'<!--.*?-->', '', data, flags=re.DOTALL)
58-
with open(path + ".cleaned", 'w') as f: f.write(clean)
59-
EOF
60-
echo "::set-output name=all_cleaned::$(echo **/*.md.cleaned)"
61-
env:
62-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
63-
- name: Check all Markdown (manual)
64-
if: github.event_name == 'workflow_dispatch'
65-
uses: lycheeverse/[email protected]
66-
with:
67-
args: >
68-
--verbose --exclude-mail --no-progress --exclude ^https?://
69-
${{ steps.strip.outputs.all_cleaned }}
70-
env:
71-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
10+
markdown-link-check:
11+
name: Check Markdown Broken Links
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
- name: Checkout Repo
16+
uses: actions/checkout@v4
17+
with:
18+
fetch-depth: 0
19+
20+
- name: Get Changed Markdown Files (PR only)
21+
id: changed-files
22+
if: github.event_name == 'pull_request'
23+
run: |
24+
git fetch origin ${{ github.base_ref }}
25+
files=$(git diff --name-only origin/${{ github.base_ref }}...HEAD | grep '\.md$' || true)
26+
echo "md_files<<EOF" >> $GITHUB_OUTPUT
27+
echo "$files" >> $GITHUB_OUTPUT
28+
echo "EOF" >> $GITHUB_OUTPUT
29+
- name: Run Initial Lychee Check (PR)
30+
if: github.event_name == 'pull_request' && steps.changed-files.outputs.md_files != ''
31+
uses: lycheeverse/[email protected]
32+
with:
33+
args: >
34+
--verbose --no-progress --exclude-mail --exclude ^https?://
35+
--output lychee/initial.out
36+
${{ steps.changed-files.outputs.md_files }}
37+
env:
38+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
39+
40+
- name: Extract Markdown Files with Broken Links
41+
if: github.event_name == 'pull_request'
42+
id: affected-md
43+
run: |
44+
mkdir -p sanitized_md
45+
# Extract only .md file paths from the lychee output
46+
affected=$(grep -E 'file://.*\.md' lychee/initial.out | cut -d' ' -f1 | sed 's|file://||' | sort -u)
47+
echo "$affected"
48+
echo "affected_files<<EOF" >> $GITHUB_OUTPUT
49+
echo "$affected" >> $GITHUB_OUTPUT
50+
echo "EOF" >> $GITHUB_OUTPUT
51+
- name: Strip HTML Comments from Affected Markdown Files Only
52+
if: steps.affected-md.outputs.affected_files != ''
53+
run: |
54+
for file in ${{ steps.affected-md.outputs.affected_files }}; do
55+
target="sanitized_md/$file"
56+
mkdir -p "$(dirname "$target")"
57+
# Copy the file and strip only comments
58+
perl -0777 -pe 's/<!--.*?-->//gs' "$file" > "$target"
59+
done
60+
- name: Rerun Lychee on Cleaned Files
61+
if: steps.affected-md.outputs.affected_files != ''
62+
uses: lycheeverse/[email protected]
63+
with:
64+
args: >
65+
--verbose --no-progress --exclude-mail --exclude ^https?://
66+
sanitized_md/**/*.md
67+
env:
68+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
69+
70+
- name: Full Repo Scan (Manual)
71+
if: github.event_name == 'workflow_dispatch'
72+
run: |
73+
mkdir -p sanitized_md
74+
rsync -av --exclude '.git' --exclude 'node_modules' ./ sanitized_md/
75+
find sanitized_md -name "*.md" -exec perl -0777 -i -pe 's/<!--.*?-->//gs' {} +
76+
- name: Run Lychee on Entire Repo (Manual)
77+
if: github.event_name == 'workflow_dispatch'
78+
uses: lycheeverse/[email protected]
79+
with:
80+
args: >
81+
--verbose --no-progress --exclude-mail --exclude ^https?://
82+
sanitized_md/**/*.md
83+
output: lychee/full.out
84+
env:
85+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)