From 8b58777968a4c663d6c1293bacff6da99e6e5203 Mon Sep 17 00:00:00 2001 From: nixxo Date: Thu, 4 Sep 2025 09:33:11 +0200 Subject: [PATCH] fix partial urlencoded link support - added full urlencoding to properly check urlencoded anchor links against non-urlencoded heading IDs - added tests urlecoding provided by https://crates.io/crates/urlencoding --- Cargo.lock | 7 +++++++ src/tools/linkchecker/Cargo.toml | 1 + src/tools/linkchecker/main.rs | 13 +------------ src/tools/linkchecker/tests/valid/inner/bar.html | 3 +++ src/tools/linkchecker/tests/valid/inner/foo.html | 8 ++++++++ .../linkchecker/tests/valid/inner/redir-target.html | 3 +++ 6 files changed, 23 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52f5048115753..fd4b661c40de7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2167,6 +2167,7 @@ version = "0.1.0" dependencies = [ "html5ever", "regex", + "urlencoding", ] [[package]] @@ -5825,6 +5826,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf-8" version = "0.7.6" diff --git a/src/tools/linkchecker/Cargo.toml b/src/tools/linkchecker/Cargo.toml index fb5bff3fe63ff..f0886e31b243f 100644 --- a/src/tools/linkchecker/Cargo.toml +++ b/src/tools/linkchecker/Cargo.toml @@ -10,3 +10,4 @@ path = "main.rs" [dependencies] regex = "1" html5ever = "0.29.0" +urlencoding = "2.1.3" diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 1dc45728c90cd..e07a0784cdb3a 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -232,18 +232,7 @@ enum FileEntry { type Cache = HashMap; fn small_url_encode(s: &str) -> String { - s.replace('<', "%3C") - .replace('>', "%3E") - .replace(' ', "%20") - .replace('?', "%3F") - .replace('\'', "%27") - .replace('&', "%26") - .replace(',', "%2C") - .replace(':', "%3A") - .replace(';', "%3B") - .replace('[', "%5B") - .replace(']', "%5D") - .replace('\"', "%22") + urlencoding::encode(s).to_string() } impl Checker { diff --git a/src/tools/linkchecker/tests/valid/inner/bar.html b/src/tools/linkchecker/tests/valid/inner/bar.html index 4b500d78b76e4..6ffda259c40eb 100644 --- a/src/tools/linkchecker/tests/valid/inner/bar.html +++ b/src/tools/linkchecker/tests/valid/inner/bar.html @@ -3,5 +3,8 @@

Bar

+ +

Bar

+ diff --git a/src/tools/linkchecker/tests/valid/inner/foo.html b/src/tools/linkchecker/tests/valid/inner/foo.html index 3c6a7483bcd46..f30bf71820519 100644 --- a/src/tools/linkchecker/tests/valid/inner/foo.html +++ b/src/tools/linkchecker/tests/valid/inner/foo.html @@ -8,7 +8,15 @@ external links not validated Redirect + + + + +

Local

+ +

Local

+ diff --git a/src/tools/linkchecker/tests/valid/inner/redir-target.html b/src/tools/linkchecker/tests/valid/inner/redir-target.html index bd59884a01ecf..ac1dec6d5b4aa 100644 --- a/src/tools/linkchecker/tests/valid/inner/redir-target.html +++ b/src/tools/linkchecker/tests/valid/inner/redir-target.html @@ -1,5 +1,8 @@

Redir

+ + +

Redir