Optimizations

yescallop · yescallop · commit 600d07a01024 · 2025-11-08T15:48:32.000+08:00
diff --git a/bench/benches/bench.rs b/bench/benches/bench.rs
@@ -10,11 +10,14 @@ use url::Url;
 
 criterion_group!(
     benches,
-    bench_parse,
-    bench_parse_iref,
-    bench_parse_iri_string,
-    bench_parse_oxiri,
-    bench_parse_url,
+    bench_parse_uri,
+    bench_parse_uri_iref,
+    bench_parse_uri_iri_string,
+    bench_parse_iri,
+    bench_parse_iri_iref,
+    bench_parse_iri_iri_string,
+    bench_parse_iri_oxiri,
+    bench_parse_iri_url,
     bench_build,
     bench_build_iri_string,
     bench_normalize,
@@ -24,36 +27,57 @@ criterion_group!(
 );
 criterion_main!(benches);
 
-const PARSE_CASE: &str = "https://user@example.com/search?q=%E6%B5%8B%E8%AF%95#fragment";
+const PARSE_URI_CASE: &str = "https://user@example.com/search?q=%E6%B5%8B%E8%AF%95#fragment";
+const PARSE_IRI_CASE: &str = "https://用户@测试.com/search?q=我们测试解析IRI#fragment";
 const NORMALIZE_CASE: &str = "eXAMPLE://a/./b/../b/%63/%7bfoo%7d";
 const RESOLVE_CASE_BASE: &str = "http://example.com/foo/bar/baz/quz";
 const RESOLVE_CASE_REF: &str = "../../../qux/./quux/../corge";
 
-fn bench_parse(c: &mut Criterion) {
-    c.bench_function("parse", |b| b.iter(|| Iri::parse(black_box(PARSE_CASE))));
+fn bench_parse_uri(c: &mut Criterion) {
+    c.bench_function("parse_uri", |b| {
+        b.iter(|| Uri::parse(black_box(PARSE_URI_CASE)))
+    });
+}
+
+fn bench_parse_uri_iref(c: &mut Criterion) {
+    c.bench_function("parse_uri_iref", |b| {
+        b.iter(|| iref::Uri::new(black_box(PARSE_URI_CASE)))
+    });
+}
+
+fn bench_parse_uri_iri_string(c: &mut Criterion) {
+    c.bench_function("parse_uri_iri_string", |b| {
+        b.iter(|| UriStr::new(black_box(PARSE_URI_CASE)))
+    });
+}
+
+fn bench_parse_iri(c: &mut Criterion) {
+    c.bench_function("parse_iri", |b| {
+        b.iter(|| Iri::parse(black_box(PARSE_IRI_CASE)))
+    });
 }
 
-fn bench_parse_iref(c: &mut Criterion) {
-    c.bench_function("parse_iref", |b| {
-        b.iter(|| iref::Iri::new(black_box(PARSE_CASE)))
+fn bench_parse_iri_iref(c: &mut Criterion) {
+    c.bench_function("parse_iri_iref", |b| {
+        b.iter(|| iref::Iri::new(black_box(PARSE_IRI_CASE)))
     });
 }
 
-fn bench_parse_iri_string(c: &mut Criterion) {
-    c.bench_function("parse_iri_string", |b| {
-        b.iter(|| IriStr::new(black_box(PARSE_CASE)))
+fn bench_parse_iri_iri_string(c: &mut Criterion) {
+    c.bench_function("parse_iri_iri_string", |b| {
+        b.iter(|| IriStr::new(black_box(PARSE_IRI_CASE)))
     });
 }
 
-fn bench_parse_oxiri(c: &mut Criterion) {
-    c.bench_function("parse_oxiri", |b| {
-        b.iter(|| oxiri::Iri::parse(black_box(PARSE_CASE)))
+fn bench_parse_iri_oxiri(c: &mut Criterion) {
+    c.bench_function("parse_iri_oxiri", |b| {
+        b.iter(|| oxiri::Iri::parse(black_box(PARSE_IRI_CASE)))
     });
 }
 
-fn bench_parse_url(c: &mut Criterion) {
-    c.bench_function("parse_url", |b| {
-        b.iter(|| Url::parse(black_box(PARSE_CASE)))
+fn bench_parse_iri_url(c: &mut Criterion) {
+    c.bench_function("parse_iri_url", |b| {
+        b.iter(|| Url::parse(black_box(PARSE_IRI_CASE)))
     });
 }
 
diff --git a/bench/result.txt b/bench/result.txt
@@ -1,13 +1,16 @@
 Environment: Intel Core i5-11300H, Rust 1.91.0 stable
 
-parse                   time:   [88.159 ns 88.572 ns 89.042 ns]
-parse_iref              time:   [141.80 ns 142.43 ns 143.17 ns]
-parse_iri_string        time:   [128.93 ns 129.82 ns 130.76 ns]
-parse_oxiri             time:   [118.65 ns 119.35 ns 120.21 ns]
-parse_url               time:   [311.78 ns 313.83 ns 316.09 ns]
-build                   time:   [197.67 ns 198.54 ns 199.76 ns]
-build_iri_string        time:   [371.53 ns 373.18 ns 375.07 ns]
-normalize               time:   [95.095 ns 96.186 ns 97.330 ns]
-normalize_iri_string    time:   [502.08 ns 504.09 ns 506.43 ns]
-resolve                 time:   [94.375 ns 94.987 ns 95.600 ns]
-resolve_iri_string      time:   [430.86 ns 432.78 ns 434.91 ns]
+parse_uri               time:   [51.437 ns 51.666 ns 51.958 ns]
+parse_uri_iref          time:   [107.81 ns 108.35 ns 108.90 ns]
+parse_uri_iri_string    time:   [138.03 ns 139.33 ns 140.78 ns]
+parse_iri               time:   [71.304 ns 71.657 ns 72.126 ns]
+parse_iri_iref          time:   [120.92 ns 121.48 ns 122.14 ns]
+parse_iri_iri_string    time:   [148.71 ns 150.10 ns 151.56 ns]
+parse_iri_oxiri         time:   [111.30 ns 112.06 ns 112.85 ns]
+parse_iri_url           time:   [601.13 ns 604.75 ns 608.75 ns]
+build                   time:   [197.04 ns 198.23 ns 199.63 ns]
+build_iri_string        time:   [378.62 ns 380.27 ns 382.36 ns]
+normalize               time:   [100.78 ns 101.90 ns 103.05 ns]
+normalize_iri_string    time:   [511.76 ns 513.77 ns 516.05 ns]
+resolve                 time:   [99.610 ns 100.19 ns 100.81 ns]
+resolve_iri_string      time:   [431.93 ns 433.37 ns 435.00 ns]
diff --git a/src/parse.rs b/src/parse.rs
@@ -1,6 +1,6 @@
 use crate::{
     imp::{AuthMeta, Constraints, HostMeta, Meta},
-    pct_enc::{table::*, Table},
+    pct_enc::{self, table::*, Table},
     utf8,
 };
 use core::{
@@ -165,33 +165,50 @@ impl<'a> Reader<'a> {
 
     fn read_with(&mut self, table: Table, mut f: impl FnMut(usize, u32)) -> Result<()> {
         let mut i = self.pos;
-        let allow_pct_encoded = table.allows_pct_encoded();
-        let allow_non_ascii = table.allows_non_ascii();
-
-        while i < self.len() {
-            let x = self.bytes[i];
-            if allow_pct_encoded && x == b'%' {
-                let [hi, lo, ..] = self.bytes[i + 1..] else {
-                    err!(i, InvalidPctEncodedOctet);
-                };
-                if !(hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit()) {
-                    err!(i, InvalidPctEncodedOctet);
-                }
-                i += 3;
-            } else if allow_non_ascii {
-                let (x, len) = utf8::next_code_point(self.bytes, i);
-                if !table.allows_code_point(x) {
-                    break;
+
+        macro_rules! do_loop {
+            ($allow_pct_encoded:expr, $allow_non_ascii:expr) => {
+                while i < self.len() {
+                    let x = self.bytes[i];
+                    if $allow_pct_encoded && x == b'%' {
+                        let [hi, lo, ..] = self.bytes[i + 1..] else {
+                            err!(i, InvalidPctEncodedOctet);
+                        };
+                        if !pct_enc::is_valid_octet(hi, lo) {
+                            err!(i, InvalidPctEncodedOctet);
+                        }
+                        i += 3;
+                    } else if $allow_non_ascii {
+                        let (x, len) = utf8::next_code_point(self.bytes, i);
+                        if !table.allows_code_point(x) {
+                            break;
+                        }
+                        f(i, x);
+                        i += len;
+                    } else {
+                        if !table.allows_ascii(x) {
+                            break;
+                        }
+                        f(i, x as u32);
+                        i += 1;
+                    }
                 }
-                f(i, x);
-                i += len;
+            };
+        }
+
+        // This expansion alone doesn't help much, but combined with
+        // `#[inline(always)]` on `utf8::next_code_point`,
+        // it improves performance significantly for non-ASCII case.
+        if table.allows_pct_encoded() {
+            if table.allows_non_ascii() {
+                do_loop!(true, true);
             } else {
-                if !table.allows_ascii(x) {
-                    break;
-                }
-                f(i, x as u32);
-                i += 1;
+                do_loop!(true, false);
             }
+        } else if table.allows_non_ascii() {
+            do_loop!(false, true);
+        } else {
+            do_loop!(false, false);
         }
 
         // INVARIANT: `i` is non-decreasing.
@@ -274,7 +291,7 @@ impl<'a> Reader<'a> {
         }
 
         let first = self.peek(0).unwrap();
-        let mut x = match (first as char).to_digit(16) {
+        let mut x = match pct_enc::decode_hexdigit(first) {
             Some(v) => v as u16,
             _ => {
                 return colon.then(|| {
@@ -296,7 +313,7 @@ impl<'a> Reader<'a> {
                 self.skip(i);
                 return None;
             };
-            match (b as char).to_digit(16) {
+            match pct_enc::decode_hexdigit(b) {
                 Some(v) => {
                     x = (x << 4) | v as u16;
                     i += 1;
diff --git a/src/pct_enc/mod.rs b/src/pct_enc/mod.rs
@@ -487,6 +487,14 @@ fn decode_octet(hi: u8, lo: u8) -> u8 {
     OCTET_TABLE_HI[hi as usize] | OCTET_TABLE_LO[lo as usize]
 }
 
+pub(crate) fn decode_hexdigit(x: u8) -> Option<u8> {
+    Some(OCTET_TABLE_LO[x as usize]).filter(|&v| v < 128)
+}
+
+pub(crate) const fn is_valid_octet(hi: u8, lo: u8) -> bool {
+    OCTET_TABLE_LO[hi as usize] | OCTET_TABLE_LO[lo as usize] < 128
+}
+
 /// An iterator used to decode an [`EStr`] slice.
 ///
 /// This struct is created by [`EStr::decode`]. Normally you'll use the methods below
diff --git a/src/pct_enc/table.rs b/src/pct_enc/table.rs
@@ -5,7 +5,7 @@
 //!
 //! [RFC 5234]: https://datatracker.ietf.org/doc/html/rfc5234
 
-use crate::utf8;
+use crate::{pct_enc, utf8};
 
 const MASK_PCT_ENCODED: u64 = 1;
 const MASK_UCSCHAR: u64 = 2;
@@ -143,34 +143,52 @@ impl Table {
     /// Validates the given string with the table.
     pub(crate) const fn validate(self, s: &[u8]) -> bool {
         let mut i = 0;
-        let allow_pct_encoded = self.allows_pct_encoded();
-        let allow_non_ascii = self.allows_non_ascii();
-
-        while i < s.len() {
-            let x = s[i];
-            if allow_pct_encoded && x == b'%' {
-                if i + 2 >= s.len() {
-                    return false;
-                }
-                let (hi, lo) = (s[i + 1], s[i + 2]);
 
-                if !(hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit()) {
-                    return false;
-                }
-                i += 3;
-            } else if allow_non_ascii {
-                let (x, len) = utf8::next_code_point(s, i);
-                if !self.allows_code_point(x) {
-                    return false;
+        macro_rules! do_loop {
+            ($allow_pct_encoded:expr, $allow_non_ascii:expr) => {
+                while i < s.len() {
+                    let x = s[i];
+                    if $allow_pct_encoded && x == b'%' {
+                        if i + 2 >= s.len() {
+                            return false;
+                        }
+                        let (hi, lo) = (s[i + 1], s[i + 2]);
+
+                        if !pct_enc::is_valid_octet(hi, lo) {
+                            return false;
+                        }
+                        i += 3;
+                    } else if $allow_non_ascii {
+                        let (x, len) = utf8::next_code_point(s, i);
+                        if !self.allows_code_point(x) {
+                            return false;
+                        }
+                        i += len;
+                    } else {
+                        if !self.allows_ascii(x) {
+                            return false;
+                        }
+                        i += 1;
+                    }
                 }
-                i += len;
+            };
+        }
+
+        // This expansion alone doesn't help much, but combined with
+        // `#[inline(always)]` on `utf8::next_code_point`,
+        // it improves performance significantly for non-ASCII case.
+        if self.allows_pct_encoded() {
+            if self.allows_non_ascii() {
+                do_loop!(true, true);
             } else {
-                if !self.allows_ascii(x) {
-                    return false;
-                }
-                i += 1;
+                do_loop!(true, false);
             }
+        } else if self.allows_non_ascii() {
+            do_loop!(false, true);
+        } else {
+            do_loop!(false, false);
         }
+
         true
     }
 }
diff --git a/src/utf8.rs b/src/utf8.rs
@@ -10,7 +10,8 @@ const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
     (ch << 6) | (byte & CONT_MASK) as u32
 }
 
-#[inline]
+// Make sure it's inlined into `Parser::read_with`.
+#[inline(always)]
 pub const fn next_code_point(bytes: &[u8], i: usize) -> (u32, usize) {
     let x = bytes[i];
     if x < 128 {

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,8 @@ const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {`
`10`	`10`	`(ch << 6) \| (byte & CONT_MASK) as u32`
`11`	`11`	`}`
`12`	`12`
`13`		`-#[inline]`
	`13`	+// Make sure it's inlined into `Parser::read_with`.
	`14`	`+#[inline(always)]`
`14`	`15`	`pub const fn next_code_point(bytes: &[u8], i: usize) -> (u32, usize) {`
`15`	`16`	`let x = bytes[i];`
`16`	`17`	`if x < 128 {`