Skip to content

Commit b79575a

Browse files
authored
Merge branch 'master' into patch-2
2 parents da73784 + 5655db7 commit b79575a

File tree

8 files changed

+139
-171
lines changed

8 files changed

+139
-171
lines changed

.github/workflows/check-links.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,5 @@ jobs:
2323
id: lychee
2424
uses: lycheeverse/lychee-action@v2
2525
with:
26-
args: --base-url dist --exclude-all-private dist
26+
args: '--root-dir ${{ github.workspace }}/dist --exclude-all-private dist'
2727
fail: false

.github/workflows/check-pr-links.yml

Lines changed: 12 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -9,64 +9,23 @@ jobs:
99
linkChecker:
1010
runs-on: ubuntu-latest
1111
steps:
12-
- name: Clone repository
13-
uses: actions/checkout@v5
14-
with:
15-
fetch-depth: 0
16-
17-
- name: Setup Node.js
18-
uses: actions/setup-node@v6
19-
with:
20-
node-version: "20"
21-
22-
- name: Setup pnpm
23-
uses: pnpm/action-setup@v4
24-
with:
25-
version: latest
26-
27-
- name: Check out master branch
28-
run: git checkout master
29-
30-
- name: Install dependencies for master
31-
run: pnpm install --frozen-lockfile
32-
33-
- name: Build site from master
34-
run: pnpm build
12+
- uses: actions/checkout@v5
3513

36-
- name: Dump all links from master
37-
id: dump_links_from_master
38-
uses: lycheeverse/lychee-action@v2
14+
- name: Build site
15+
uses: withastro/action@v5
3916
with:
40-
args: --dump --base-url dist --exclude-all-private dist
41-
output: ./links-master.txt
42-
43-
- name: Stash untracked files
44-
run: git stash push --include-untracked
45-
46-
- name: Check out feature branch
47-
run: |
48-
if [ "${{ github.event_name }}" = "pull_request" ]; then
49-
git checkout ${{ github.head_ref }}
50-
else
51-
git checkout ${{ github.ref_name }}
52-
fi
53-
54-
- name: Apply stashed changes
55-
run: git stash pop || true
56-
57-
- name: Install dependencies for feature branch
58-
run: pnpm install --frozen-lockfile
59-
60-
- name: Build site from feature branch
61-
run: pnpm build
62-
63-
- name: Append links-master.txt to .lycheeignore
64-
run: cat links-master.txt >> .lycheeignore
17+
package-manager: pnpm@latest
6518

66-
- name: Check links in PR changes
19+
- name: Check links
6720
uses: lycheeverse/lychee-action@v2
6821
with:
69-
args: --base-url dist --exclude-all-private dist
22+
# Remap live URLs to build directory because the links are potentially not live (not yet on master)
23+
args: |
24+
--root-dir $PWD/dist
25+
--exclude-all-private
26+
--remap 'https://lychee\.cli\.rs/(.*)/ file://'$PWD'/dist/$1/index.html'
27+
dist/
28+
src/
7029
fail: true
7130

7231
- name: Suggestions

.lycheeignore

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,17 @@
1-
https://api.reacher.email/v0/check_email
21
file:///home/user/website/
32
^https://www/$
43
^https://web/$
5-
# 404 page returns a 404, d'oh
6-
https://lychee.cli.rs/404/
7-
# Errors with "Too Many Requests"
4+
5+
# URL is used with POST
6+
https://api.reacher.email/v0/check_email
7+
8+
# 404 page is directly in dist/404.html but we've remapped it to an invalid path
9+
dist/404/index.html$
10+
11+
# Code examples in base-url.mdx which don't exist
12+
/docs/about.php$
13+
/docs/recipes/guide.php$
14+
15+
# Websites with aggressive rate limiting / bot detection
816
https://www.nongnu.org/atool
17+
https://builtwith.com/

astro.config.mjs

Lines changed: 32 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -37,114 +37,58 @@ export default defineConfig({
3737
{
3838
label: "Guides",
3939
items: [
40-
{ label: "Getting Started", link: "/guides/getting-started" },
41-
{ label: "Library Usage", link: "/guides/library" },
42-
{ label: "Configure lychee", link: "/guides/config" },
43-
{ label: "CLI", link: "/guides/cli" },
44-
{ label: "Output Modes", link: "/guides/output" },
40+
"guides/getting-started",
41+
"guides/library",
42+
"guides/config",
43+
"guides/cli",
44+
"guides/output",
45+
"guides/preprocessing",
4546
],
4647
},
4748
{
4849
label: "Recipes",
4950
items: [
50-
{ label: "Anchor Links", link: "/recipes/anchors" },
51-
{ label: "Caching", link: "/recipes/caching" },
52-
{ label: "Excluding Links", link: "/recipes/excluding-links" },
53-
{ label: "Excluding Paths", link: "/recipes/excluding-paths" },
54-
{
55-
label: "Remapping One URL to Another",
56-
link: "/recipes/migration",
57-
},
58-
{
59-
label: "Testing Sites Not Served from Root with --base-url",
60-
link: "/recipes/base-url",
61-
},
62-
{
63-
label: "Local File Checking with --root-dir",
64-
link: "/recipes/root-dir",
65-
},
66-
{
67-
label: "Pretty URLs (Fallback Extensions and Index Files)",
68-
link: "/recipes/pretty-urls",
69-
},
70-
{ label: "Wikilinks", link: "/recipes/wikilinks" },
51+
"recipes/anchors",
52+
"recipes/caching",
53+
"recipes/excluding-links",
54+
"recipes/excluding-paths",
55+
"recipes/migration",
56+
"recipes/base-url",
57+
"recipes/root-dir",
58+
"recipes/pretty-urls",
59+
"recipes/wikilinks",
7160
],
7261
},
7362
{
7463
label: "GitHub Action Recipes",
7564
items: [
76-
{
77-
label: "Check Links in Repository",
78-
link: "/github_action_recipes/check-repository",
79-
},
80-
{
81-
label: "Check Links in Pull Requests",
82-
link: "/github_action_recipes/pull-requests",
83-
},
84-
{
85-
label: "Replace with Archived Links",
86-
link: "/github_action_recipes/archived-links",
87-
},
88-
{
89-
label: "Add Pull Request Comment",
90-
link: "/github_action_recipes/add-pr-comment",
91-
},
92-
{
93-
label: "Caching Requests",
94-
link: "/github_action_recipes/caching",
95-
},
65+
"github_action_recipes/check-repository",
66+
"github_action_recipes/pull-requests",
67+
"github_action_recipes/archived-links",
68+
"github_action_recipes/add-pr-comment",
69+
"github_action_recipes/caching",
9670
],
9771
},
9872
{
9973
label: "Troubleshooting",
10074
items: [
101-
{ label: "Rate Limits", link: "/troubleshooting/rate-limits" },
102-
{
103-
label: "Custom Headers",
104-
link: "/troubleshooting/custom-headers",
105-
},
106-
{ label: "Mail Addresses", link: "/troubleshooting/mail" },
107-
{
108-
label: "Network Errors",
109-
link: "/troubleshooting/network-errors",
110-
},
111-
{
112-
label: "Too Many Open Files",
113-
link: "/troubleshooting/open-files",
114-
},
115-
{
116-
label: "Too Many Redirects",
117-
link: "/troubleshooting/redirects",
118-
},
119-
{
120-
label: "Special Status Codes",
121-
link: "/troubleshooting/status-codes",
122-
},
75+
"troubleshooting/rate-limits",
76+
"troubleshooting/custom-headers",
77+
"troubleshooting/mail",
78+
"troubleshooting/network-errors",
79+
"troubleshooting/open-files",
80+
"troubleshooting/redirects",
81+
"troubleshooting/status-codes",
12382
],
12483
},
12584
{
12685
label: "Internals",
12786
items: [
128-
{
129-
label: "How lychee Works",
130-
link: "/internals/how-it-works",
131-
},
132-
{
133-
label: "Contributing",
134-
link: "/internals/contributing",
135-
},
136-
{
137-
label: "Sponsors",
138-
link: "/internals/sponsors",
139-
},
140-
{
141-
label: "Credits",
142-
link: "/internals/credits",
143-
},
144-
{
145-
label: "Users",
146-
link: "/internals/users",
147-
},
87+
"internals/how-it-works",
88+
"internals/contributing",
89+
"internals/sponsors",
90+
"internals/credits",
91+
"internals/users",
14892
],
14993
},
15094
],

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"lint:fix": "biome lint --write .",
1515
"check-format": "biome check .",
1616
"check-format:fix": "biome check --write .",
17-
"precommit": "npm run check-format && npm run lint"
17+
"precommit": "pnpm run check-format && pnpm run lint"
1818
},
1919
"dependencies": {
2020
"@astrojs/check": "^0.9.5",

src/content/docs/guides/getting-started.mdx

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ You can install lychee using various package managers.
2323
<Code code="docker pull lycheeverse/lychee" lang="sh" />
2424
</TabItem>
2525
<TabItem label="NixOS">
26-
<Code code="nix-env -iA nixos.lychee" lang="sh" />
26+
<Code code="nix-shell -p lychee" lang="sh" />
2727
</TabItem>
2828
<TabItem label="FreeBSD">
2929
<Code code="pkg install lychee" lang="sh" />
@@ -206,24 +206,11 @@ In this command, we ignore the case when globbing, so it matches
206206
- `~/projects/rust_game_/README`
207207
- `~/projects/python_script_/Readme.markdown`
208208

209-
### Check Links From Epub File
209+
### Check other file formats
210210

211-
If you have [atool](https://www.nongnu.org/atool) installed, you can check links inside `.epub` files as well!
212-
213-
```bash
214-
acat -F zip {file.epub} "_.xhtml" "_.html" | lychee -
215-
```
216-
217-
:::caution[Attention]
218-
lychee parses other file formats as plaintext and extracts links using [linkify](https://github.com/robinst/linkify).
219-
This generally works well if there are no format- or encoding
220-
specifics, but in case you need dedicated support for a new file format, please
221-
consider [creating an issue](https://github.com/lycheeverse/lychee/issues).
222-
:::
223-
224-
[atool]: https://www.nongnu.org/atool
225-
[linkify]: https://github.com/robinst/linkify
226-
[issue]: https://github.com/lycheeverse/lychee/issues
211+
By preprocessing files it is possible to do link checking on
212+
files which aren't officially supported by lychee.
213+
See [file preprocessing](/guides/preprocessing).
227214

228215
## GitHub Action
229216

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
---
2+
title: File preprocessing
3+
---
4+
5+
Out of the box lychee supports HTML, Markdown and plain text formats.
6+
More precisely, HTML files are parsed as HTML5 with the use of the [html5ever] parser.
7+
Markdown files are treated as [CommonMark] with the use of [pulldown-cmark].
8+
9+
For any other file format lychee falls back to a "plain text" mode.
10+
This means that [linkify] attempts to extract URLs on a best-effort basis.
11+
If invalid UTF-8 characters are encountered, the input file is skipped,
12+
because it is assumed that the file is in a binary format lychee cannot understand.
13+
14+
lychee allows file preprocessing with the `--preprocess` flag.
15+
For each input file the command specified with `--preprocess` is invoked instead of reading the input file directly.
16+
In the following there are examples how to preprocess common file formats.
17+
In most cases it's necessary to create a helper script for preprocessing,
18+
as no parameters can be supplied from the CLI directly.
19+
20+
```bash
21+
lychee files/* --preprocess ./preprocess.sh
22+
```
23+
24+
The referenced `preprocess.sh` script could look like this:
25+
26+
```bash
27+
#!/usr/bin/env bash
28+
29+
case "$1" in
30+
*.pdf)
31+
exec pdftohtml -i -s -stdout "$1"
32+
# Alternatives:
33+
# exec pdftotext "$1" -
34+
# exec pdftk "$1" output - uncompress | grep -aPo '/URI *\(\K[^)]*'
35+
;;
36+
*.odt|*.docx|*.epub|*.ipynb)
37+
exec pandoc "$1" --to=html --wrap=none --markdown-headings=atx
38+
;;
39+
*.odp|*.pptx|*.ods|*.xlsx)
40+
# libreoffice can't print to stdout unfortunately
41+
libreoffice --headless --convert-to html "$1" --outdir /tmp
42+
file=$(basename "$1")
43+
file="/tmp/${file%.*}.html"
44+
sed '/<body/,$!d' "$file" # discard content before body which contains libreoffice URLs
45+
rm "$file"
46+
;;
47+
*.adoc|*.asciidoc)
48+
asciidoctor -a stylesheet! "$1" -o -
49+
;;
50+
*.csv)
51+
# specify --delimiter if values not delimited by ","
52+
exec csvtk csv2json "$1"
53+
;;
54+
*)
55+
# identity function, output input without changes
56+
exec cat
57+
;;
58+
esac
59+
```
60+
61+
For more examples and information take a look at [lychee-all],
62+
a repository dedicated to collect use-cases with file preprocessing.
63+
Feel free to open up an issue if you are missing a specific file format or have questions.
64+
65+
[linkify]: https://github.com/robinst/linkify
66+
[html5ever]: https://github.com/servo/html5ever
67+
[CommonMark]: https://commonmark.org/
68+
[pulldown-cmark]: https://github.com/pulldown-cmark/pulldown-cmark/
69+
[lychee-all]: https://github.com/lycheeverse/lychee-all

src/content/docs/recipes/base-url.mdx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ Here's what happens to different types of links:
6666

6767
<Code
6868
code={`<!-- Original links -->
69-
<a href="./guide.html">Guide</a>
70-
<a href="../about.html">About</a>
71-
<a href="https://other.com">External</a>
69+
<a href="./guide.php">Guide</a>
70+
<a href="../about.php">About</a>
71+
<a href="https://example.com">Absolute</a>
7272
7373
<!-- After --base-url https://example.com/docs/ -->
7474
75-
<a href="https://example.com/docs/guide.html">Guide</a>
76-
<a href="https://example.com/about.html">About</a>
77-
<a href="https://other.com">External</a>`} lang={fileLang}
75+
<a href="https://example.com/docs/guide.php">Guide</a>
76+
<a href="https://example.com/about.php">About</a>
77+
<a href="https://example.com">Absolute</a>`} lang={fileLang}
7878
title="Link Resolution Example" />
7979

8080
## Common Use Cases

0 commit comments

Comments
 (0)