Skip to content

Commit cc46bd9

Browse files
committed
Improve performance of reference resolution
Allocate only once when resolving. Remove unnecessary operations.
1 parent 9336c89 commit cc46bd9

File tree

2 files changed

+51
-58
lines changed

2 files changed

+51
-58
lines changed

src/normalize.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ pub(crate) fn normalize(r: RmrRef<'_, '_>, ascii_only: bool) -> (String, Meta) {
2121

2222
if r.has_scheme() && path.starts_with('/') {
2323
normalize_estr(&mut buf, path, false, ascii_only, false);
24-
resolve::remove_dot_segments(&mut path_buf, &buf, true).unwrap();
24+
resolve::remove_dot_segments(&mut path_buf, 0, &[&buf]);
2525
buf.clear();
2626
} else {
2727
// Don't remove dot segments from relative reference or rootless path.

src/resolve.rs

Lines changed: 50 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ pub(crate) fn resolve(
121121
}
122122

123123
let (t_scheme, t_authority, t_path, t_query, t_fragment);
124-
let mut buf = String::new();
125124

126125
let r_scheme = r.scheme_opt();
127126
let r_authority = r.authority();
@@ -132,57 +131,43 @@ pub(crate) fn resolve(
132131
if let Some(r_scheme) = r_scheme {
133132
t_scheme = r_scheme;
134133
t_authority = r_authority;
135-
t_path = if r_path.is_absolute() {
136-
buf.reserve_exact(r_path.len());
137-
remove_dot_segments(&mut buf, r_path.as_str(), allow_path_underflow)?
138-
} else {
139-
r_path.as_str()
140-
};
134+
t_path = (r_path.as_str(), None);
141135
t_query = r_query;
142136
} else {
143137
if r_authority.is_some() {
144138
t_authority = r_authority;
145-
buf.reserve_exact(r_path.len());
146-
t_path = remove_dot_segments(&mut buf, r_path.as_str(), allow_path_underflow)?;
139+
t_path = (r_path.as_str(), None);
147140
t_query = r_query;
148141
} else {
149142
if r_path.is_empty() {
150-
let base_path = base.path();
151-
t_path = if base_path.is_absolute() {
152-
buf.reserve_exact(base_path.len());
153-
remove_dot_segments(&mut buf, base_path.as_str(), allow_path_underflow)?
154-
} else {
155-
base_path.as_str()
156-
};
143+
t_path = (base.path().as_str(), None);
157144
if r_query.is_some() {
158145
t_query = r_query;
159146
} else {
160147
t_query = base.query();
161148
}
162149
} else {
163150
if r_path.is_absolute() {
164-
buf.reserve_exact(r_path.len());
165-
t_path = remove_dot_segments(&mut buf, r_path.as_str(), allow_path_underflow)?;
151+
t_path = (r_path.as_str(), None);
166152
} else {
167-
// Instead of merging the paths, remove dot segments incrementally.
168-
let base_path = base.path().as_str();
169-
if base_path.is_empty() {
170-
buf.reserve_exact(r_path.len() + 1);
171-
buf.push('/');
153+
let base_path = base.path();
154+
let base_path = if base_path.is_empty() {
155+
"/"
172156
} else {
173-
// Make sure that swapping the order of resolution and normalization
174-
// does not change the result.
175-
let last_slash_i = base_path.rfind('/').unwrap();
176-
let last_seg = &base_path[last_slash_i + 1..];
177-
let base_path_stripped = match classify_segment(last_seg) {
178-
SegKind::DoubleDot => base_path,
179-
_ => &base_path[..=last_slash_i],
180-
};
157+
base_path.as_str()
158+
};
181159

182-
buf.reserve_exact(base_path_stripped.len() + r_path.len());
183-
remove_dot_segments(&mut buf, base_path_stripped, allow_path_underflow)?;
184-
}
185-
t_path = remove_dot_segments(&mut buf, r_path.as_str(), allow_path_underflow)?;
160+
// Make sure that swapping the order of resolution and normalization
161+
// does not change the result.
162+
let last_slash_i = base_path.rfind('/').unwrap();
163+
let last_seg = &base_path[last_slash_i + 1..];
164+
let base_path_stripped = match classify_segment(last_seg) {
165+
SegKind::DoubleDot => base_path,
166+
_ => &base_path[..=last_slash_i],
167+
};
168+
169+
// Instead of merging the paths, remove dot segments incrementally.
170+
t_path = (base_path_stripped, Some(r_path.as_str()));
186171
}
187172
t_query = r_query;
188173
}
@@ -197,10 +182,7 @@ pub(crate) fn resolve(
197182
if let Some(authority) = t_authority {
198183
len += authority.as_str().len() + 2;
199184
}
200-
if t_authority.is_none() && t_path.starts_with("//") {
201-
len += 2;
202-
}
203-
len += t_path.len();
185+
len += t_path.0.len() + t_path.1.map_or(0, |s| s.len());
204186
if let Some(query) = t_query {
205187
len += query.len() + 1;
206188
}
@@ -226,12 +208,27 @@ pub(crate) fn resolve(
226208
meta.auth_meta = Some(auth_meta);
227209
}
228210

229-
meta.path_bounds.0 = buf.len();
211+
let path_start = buf.len();
212+
meta.path_bounds.0 = path_start;
213+
214+
if t_path.0.starts_with('/') {
215+
let path = match t_path.1 {
216+
Some(s) => &[t_path.0, s][..],
217+
None => &[t_path.0],
218+
};
219+
let underflow_occurred = remove_dot_segments(&mut buf, path_start, path);
220+
if underflow_occurred && !allow_path_underflow {
221+
return Err(ResolveError::PathUnderflow);
222+
}
223+
} else {
224+
buf.push_str(t_path.0);
225+
}
226+
230227
// Close the loophole in the original algorithm.
231-
if t_authority.is_none() && t_path.starts_with("//") {
232-
buf.push_str("/.");
228+
if t_authority.is_none() && buf[path_start..].starts_with("//") {
229+
buf.insert_str(path_start, "/.");
233230
}
234-
buf.push_str(t_path);
231+
235232
meta.path_bounds.1 = buf.len();
236233

237234
if let Some(query) = t_query {
@@ -245,32 +242,28 @@ pub(crate) fn resolve(
245242
buf.push_str(fragment.as_str());
246243
}
247244

248-
debug_assert_eq!(buf.len(), len);
245+
debug_assert!(buf.len() <= len);
249246

250247
Ok((buf, meta))
251248
}
252249

253-
pub(crate) fn remove_dot_segments<'a>(
254-
buf: &'a mut String,
255-
path: &str,
256-
allow_path_underflow: bool,
257-
) -> Result<&'a str, ResolveError> {
258-
for seg in path.split_inclusive('/') {
250+
pub(crate) fn remove_dot_segments(buf: &mut String, start: usize, path: &[&str]) -> bool {
251+
let mut underflow_occurred = false;
252+
for seg in path.iter().flat_map(|s| s.split_inclusive('/')) {
259253
let seg_stripped = seg.strip_suffix('/').unwrap_or(seg);
260254
match classify_segment(seg_stripped) {
261-
SegKind::Dot => buf.truncate(buf.rfind('/').unwrap() + 1),
255+
SegKind::Dot => {}
262256
SegKind::DoubleDot => {
263-
if buf.len() != 1 {
264-
buf.truncate(buf.rfind('/').unwrap());
265-
buf.truncate(buf.rfind('/').unwrap() + 1);
266-
} else if !allow_path_underflow {
267-
return Err(ResolveError::PathUnderflow);
257+
if buf.len() > start + 1 {
258+
buf.truncate(buf[..buf.len() - 1].rfind('/').unwrap() + 1);
259+
} else {
260+
underflow_occurred = true;
268261
}
269262
}
270263
SegKind::Normal => buf.push_str(seg),
271264
}
272265
}
273-
Ok(buf)
266+
underflow_occurred
274267
}
275268

276269
enum SegKind {

0 commit comments

Comments
 (0)