Skip to content

Commit 4931ae5

Browse files
committed
Add basic netlify redirects support
1 parent 519c669 commit 4931ae5

File tree

4 files changed

+172
-19
lines changed

4 files changed

+172
-19
lines changed

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,36 @@ and `--github-actions` feature.
184184
fairly feature-rich, but was a non-starter due to performance. This applies
185185
to other countless link checkers we tried that are not mentioned here.
186186

187+
## Redirects
188+
189+
Since 0.1.45 `hyperlink` supports a very small subset of Netlify's `_redirects`
190+
file. Redirect sources will be considered when validating `href`s, and redirect
191+
targets will be checked for validity as well.
192+
193+
At the root of your site, make a file `_redirects`:
194+
195+
```
196+
# lines starting with # are ignored
197+
/old-url.html /new-url.html
198+
199+
# on the next line, trailing data like the 301 status code is ignored
200+
/old-url2.html /new-url2.html 301
201+
202+
# /old-url.html will become a valid link target
203+
# hyperlink will validate that /new-url.html exists.
204+
```
205+
206+
The major things missing from the implementation are:
207+
208+
* `hyperlink` completely ignores any status codes or country code conditions.
209+
The only thing it parses are `from to`, and the rest is ignored.
210+
211+
* "Splat sources" (`/articles/*`) and "splat targets" (`/posts/:splat`) are not
212+
supported.
213+
214+
* Generally speaking, `hyperlink` does not support "pretty URLs", i.e. one
215+
cannot request `/mypage` and expect `mypage.html` to be loaded.
216+
187217
## Testimonials
188218

189219
> We use Hyperlink to check for dead links on

src/html/mod.rs

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ mod parser;
33
use std::borrow::Cow;
44
use std::fmt;
55
use std::fs;
6-
use std::io::Read;
6+
use std::io::{BufRead, BufReader, Read};
77
use std::path::{Path, PathBuf};
88
use std::str;
99
use std::sync::Arc;
@@ -308,6 +308,41 @@ impl Document {
308308
Href(href.into_bump_str())
309309
}
310310

311+
pub fn extract_links<'b, 'l, P: ParagraphWalker, F>(
312+
&self,
313+
doc_buf: &'b mut DocumentBuffers,
314+
check_anchors: bool,
315+
mut callback: F,
316+
) -> Result<bool, Error>
317+
where
318+
'b: 'l,
319+
F: FnMut(Link<'l, P::Paragraph>),
320+
{
321+
if self.path.file_name().and_then(|f| f.to_str()) == Some("_redirects") {
322+
for link in self.parse_redirects::<P>(doc_buf, check_anchors)? {
323+
callback(link);
324+
}
325+
return Ok(true);
326+
}
327+
328+
if self
329+
.path
330+
.extension()
331+
.and_then(|extension| {
332+
let ext = extension.to_str()?;
333+
Some(ext == "html" || ext == "htm")
334+
})
335+
.unwrap_or(false)
336+
{
337+
for link in self.links_from_html::<P>(doc_buf, check_anchors)? {
338+
callback(link);
339+
}
340+
return Ok(true);
341+
}
342+
343+
Ok(false)
344+
}
345+
311346
pub fn links<'b, 'l, P: ParagraphWalker>(
312347
&self,
313348
doc_buf: &'b mut DocumentBuffers,
@@ -319,6 +354,62 @@ impl Document {
319354
self.links_from_read::<_, P>(doc_buf, fs::File::open(&*self.path)?, check_anchors)
320355
}
321356

357+
fn links_from_html<'b, 'l, P: ParagraphWalker>(
358+
&self,
359+
doc_buf: &'b mut DocumentBuffers,
360+
check_anchors: bool,
361+
) -> Result<impl Iterator<Item = Link<'l, P::Paragraph>>, Error>
362+
where
363+
'b: 'l,
364+
{
365+
self.links_from_read::<_, P>(doc_buf, fs::File::open(&*self.path)?, check_anchors)
366+
}
367+
368+
fn parse_redirects<'b, 'l, P: ParagraphWalker>(
369+
&self,
370+
doc_buf: &'b mut DocumentBuffers,
371+
check_anchors: bool,
372+
) -> Result<impl Iterator<Item = Link<'l, P::Paragraph>>, Error>
373+
where
374+
'b: 'l,
375+
{
376+
let mut link_buf = BumpVec::new_in(&doc_buf.arena);
377+
let file = fs::File::open(&*self.path)?;
378+
let reader = BufReader::new(file);
379+
380+
for line in reader.lines() {
381+
let line = line?;
382+
383+
let trimmed = line.trim();
384+
if trimmed.is_empty() || trimmed.starts_with('#') {
385+
continue;
386+
}
387+
388+
let parts: Vec<&str> = trimmed.split_whitespace().collect();
389+
if parts.len() >= 2 {
390+
let source = parts[0];
391+
let target = parts[1];
392+
393+
let source_str = doc_buf.arena.alloc_str(source);
394+
let target_str = doc_buf.arena.alloc_str(target);
395+
396+
link_buf.push(Link::Defines(DefinedLink {
397+
href: self.join(&doc_buf.arena, check_anchors, source_str),
398+
}));
399+
400+
if !is_external_link(target.as_bytes()) {
401+
link_buf.push(Link::Uses(UsedLink {
402+
href: self.join(&doc_buf.arena, check_anchors, target_str),
403+
path: self.path.clone(),
404+
paragraph: None,
405+
}));
406+
}
407+
}
408+
}
409+
410+
Ok(link_buf.into_iter())
411+
}
412+
322413
fn links_from_read<'b, 'l, R: Read, P: ParagraphWalker>(
323414
&self,
324415
doc_buf: &'b mut DocumentBuffers,

src/main.rs

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -468,26 +468,17 @@ fn extract_html_links<C: LinkCollector<P::Paragraph>, P: ParagraphWalker>(
468468
}));
469469
file_count += 1;
470470

471-
if !document
472-
.path
473-
.extension()
474-
.and_then(|extension| Some(HTML_FILES.contains(&extension.to_str()?)))
475-
.unwrap_or(false)
476-
{
477-
return Ok((doc_buf, collector, documents_count, file_count));
471+
let was_parsed = document
472+
.extract_links::<P, _>(&mut doc_buf, check_anchors, |link| {
473+
collector.ingest(link);
474+
})
475+
.with_context(|| format!("Failed to read file {}", document.path.display()))?;
476+
477+
if was_parsed {
478+
doc_buf.reset();
479+
documents_count += 1;
478480
}
479481

480-
for link in document
481-
.links::<P>(&mut doc_buf, check_anchors)
482-
.with_context(|| format!("Failed to read file {}", document.path.display()))?
483-
{
484-
collector.ingest(link);
485-
}
486-
487-
doc_buf.reset();
488-
489-
documents_count += 1;
490-
491482
Ok((doc_buf, collector, documents_count, file_count))
492483
},
493484
)

tests/cli.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,44 @@ fn test_bad_dir() {
6565
"Error: IO error for operation on non_existing_dir:",
6666
));
6767
}
68+
69+
#[test]
70+
fn test_redirects() {
71+
let site = assert_fs::TempDir::new().unwrap();
72+
73+
site.child("_redirects")
74+
.write_str(
75+
"# This is a comment\n\
76+
\n\
77+
/old-page /new-page.html 301\n\
78+
/external https://example.com/page\n\
79+
/broken /missing-page.html\n\
80+
/another /target.html",
81+
)
82+
.unwrap();
83+
84+
site.child("new-page.html").touch().unwrap();
85+
site.child("target.html").touch().unwrap();
86+
87+
site.child("index.html")
88+
.write_str("<a href='/old-page'>link</a>")
89+
.unwrap();
90+
91+
let mut cmd = Command::cargo_bin("hyperlink").unwrap();
92+
cmd.current_dir(site.path()).arg(".");
93+
94+
cmd.assert().failure().code(1).stdout(
95+
predicate::str::is_match(
96+
r#"^Reading files
97+
Checking 4 links from 4 files \(4 documents\)
98+
\./.*_redirects
99+
error: bad link /missing-page\.html
100+
101+
Found 1 bad links
102+
"#,
103+
)
104+
.unwrap(),
105+
);
106+
107+
site.close().unwrap();
108+
}

0 commit comments

Comments
 (0)