Skip to content

Commit 5da034a

Browse files
committed
Optimize remove_dot_segments
30% faster
1 parent e77b670 commit 5da034a

File tree

3 files changed

+58
-35
lines changed

3 files changed

+58
-35
lines changed

bench/benches/bench.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ criterion_main!(benches);
2626

2727
const PARSE_CASE: &str = "https://user@example.com/search?q=%E6%B5%8B%E8%AF%95#fragment";
2828
const NORMALIZE_CASE: &str = "eXAMPLE://a/./b/../b/%63/%7bfoo%7d";
29-
const RESOLVE_CASE_BASE: &str = "http://example.com/foo/bar";
30-
const RESOLVE_CASE_REF: &str = "../baz";
29+
const RESOLVE_CASE_BASE: &str = "http://example.com/foo/bar/baz/quz";
30+
const RESOLVE_CASE_REF: &str = "../../../qux/./quux/../corge";
3131

3232
fn bench_parse(c: &mut Criterion) {
3333
c.bench_function("parse", |b| b.iter(|| Iri::parse(black_box(PARSE_CASE))));

src/normalize.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ pub(crate) fn normalize(
224224
let mut path_buf = String::with_capacity(path.len());
225225
normalize_estr(&mut path_buf, path, false, ascii_only);
226226

227-
let underflow_occurred = resolve::remove_dot_segments(&mut buf, &[&path_buf]);
227+
let underflow_occurred = resolve::remove_dot_segments(&mut buf, &path_buf, None);
228228
if underflow_occurred && !allow_path_underflow {
229229
return Err(NormalizeError::PathUnderflow);
230230
}

src/resolve.rs

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use crate::imp::{Meta, Ri, RiMaybeRef, RmrRef};
44
use alloc::string::String;
55
use borrow_or_share::Bos;
6-
use core::{fmt, num::NonZeroUsize};
6+
use core::{fmt, iter, num::NonZeroUsize};
77

88
/// An error occurred when resolving a URI/IRI reference.
99
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
@@ -174,8 +174,8 @@ pub(crate) fn resolve(
174174

175175
// Make sure that swapping the order of resolution and normalization
176176
// does not change the result.
177-
let last_slash_idx = base_path.rfind('/').unwrap();
178-
let last_seg = &base_path[last_slash_idx + 1..];
177+
let last_slash_idx = base_path.bytes().rposition(|b| b == b'/').unwrap();
178+
let last_seg = &base_path.as_bytes()[last_slash_idx + 1..];
179179
let base_path_stripped = match classify_segment(last_seg) {
180180
SegKind::DoubleDot => base_path,
181181
_ => &base_path[..=last_slash_idx],
@@ -227,10 +227,7 @@ pub(crate) fn resolve(
227227
meta.path_bounds.0 = path_start;
228228

229229
if t_path.0.starts_with('/') {
230-
let path = [t_path.0, t_path.1.unwrap_or("")];
231-
let path = &path[..=t_path.1.is_some() as usize];
232-
233-
let underflow_occurred = remove_dot_segments(&mut buf, path);
230+
let underflow_occurred = remove_dot_segments(&mut buf, t_path.0, t_path.1);
234231
if underflow_occurred && !allow_path_underflow {
235232
return Err(ResolveError::PathUnderflow);
236233
}
@@ -261,30 +258,50 @@ pub(crate) fn resolve(
261258
Ok((buf, meta))
262259
}
263260

264-
pub(crate) fn remove_dot_segments(buf: &mut String, path: &[&str]) -> bool {
265-
debug_assert!(path[0].starts_with('/'));
261+
pub(crate) fn remove_dot_segments(buf: &mut String, abs: &str, rel: Option<&str>) -> bool {
262+
debug_assert!(abs.starts_with('/'));
266263

267264
let min_len = buf.len() + 1;
268-
let mut underflow_occurred = false;
269-
270-
for seg in path.iter().flat_map(|s| s.split_inclusive('/')) {
271-
match classify_segment(seg.strip_suffix('/').unwrap_or(seg)) {
272-
SegKind::Dot => {}
273-
SegKind::DoubleDot => {
274-
if buf.len() > min_len {
275-
let prev_slash_idx = buf.as_bytes()[..buf.len() - 1]
276-
.iter()
277-
.rposition(|&b| b == b'/')
278-
.unwrap();
279-
buf.truncate(prev_slash_idx + 1);
280-
} else {
281-
underflow_occurred = true;
265+
let mut underflow = false;
266+
267+
for part in iter::once(abs).chain(rel) {
268+
let bytes = part.as_bytes();
269+
let len = bytes.len();
270+
271+
let mut start = 0;
272+
while start < len {
273+
// Find next '/' or end.
274+
let mut end = start;
275+
while end < len && bytes[end] != b'/' {
276+
end += 1;
277+
}
278+
let seg = &bytes[start..end];
279+
280+
match classify_segment(seg) {
281+
SegKind::Dot => {}
282+
SegKind::DoubleDot => {
283+
if buf.len() <= min_len {
284+
underflow = true;
285+
} else {
286+
// Truncate to previous segment start.
287+
let prev_slash_idx = buf.as_bytes()[..buf.len() - 1]
288+
.iter()
289+
.rposition(|&b| b == b'/')
290+
.unwrap();
291+
buf.truncate(prev_slash_idx + 1);
292+
}
282293
}
294+
// Append the segment and the following '/' if any.
295+
SegKind::Normal => buf.push_str(&part[start..len.min(end + 1)]),
283296
}
284-
SegKind::Normal => buf.push_str(seg),
297+
298+
if end == len {
299+
break;
300+
}
301+
start = end + 1; // Skip '/'.
285302
}
286303
}
287-
underflow_occurred
304+
underflow
288305
}
289306

290307
enum SegKind {
@@ -293,13 +310,19 @@ enum SegKind {
293310
Normal,
294311
}
295312

296-
fn classify_segment(seg: &str) -> SegKind {
297-
match seg.as_bytes() {
298-
[b'.', rem @ ..] | [b'%', b'2', b'E' | b'e', rem @ ..] => match rem {
299-
[] => SegKind::Dot,
300-
b"." | [b'%', b'2', b'E' | b'e'] => SegKind::DoubleDot,
301-
_ => SegKind::Normal,
302-
},
313+
fn classify_segment(s: &[u8]) -> SegKind {
314+
fn is_pct2e(s: &[u8]) -> bool {
315+
&s[..2] == b"%2" && (s[2] | 0x20) == b'e'
316+
}
317+
318+
match s.len() {
319+
1 if s == b"." => SegKind::Dot,
320+
2 if s == b".." => SegKind::DoubleDot,
321+
3 if is_pct2e(s) => SegKind::Dot,
322+
4 if (s[0] == b'.' && is_pct2e(&s[1..])) || (s[3] == b'.' && is_pct2e(&s[..3])) => {
323+
SegKind::DoubleDot
324+
}
325+
6 if is_pct2e(&s[..3]) && is_pct2e(&s[3..]) => SegKind::DoubleDot,
303326
_ => SegKind::Normal,
304327
}
305328
}

0 commit comments

Comments
 (0)