Skip to content

Commit afffff2

Browse files
committed
Rewrites vpath to avoid relying on regexes
1 parent c76391c commit afffff2

File tree

1 file changed

+95
-109
lines changed

1 file changed

+95
-109
lines changed

src/fs.rs

Lines changed: 95 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use lazy_static::lazy_static;
2-
use regex::bytes::Regex;
31
use serde::Deserialize;
42
use std::{path::{Path, PathBuf}, str::Utf8Error};
53

@@ -77,18 +75,6 @@ pub enum Error {
7775
IOError(#[from] std::io::Error),
7876
}
7977

80-
fn make_io_utf8_error() -> std::io::Error {
81-
std::io::Error::new(
82-
std::io::ErrorKind::InvalidData,
83-
"File did not contain valid UTF-8"
84-
)
85-
}
86-
87-
fn io_bytes_to_str(vec: &[u8]) -> Result<&str, std::io::Error> {
88-
std::str::from_utf8(vec)
89-
.map_err(|_| make_io_utf8_error())
90-
}
91-
9278
#[cfg(feature = "mmap")]
9379
pub fn open_zip_via_mmap<P: AsRef<Path>>(p: P) -> Result<Zip<mmap_rs::Mmap>, std::io::Error> {
9480
let file = fs::File::open(p)?;
@@ -176,126 +162,126 @@ where Storage: AsRef<[u8]> + Send + Sync {
176162
}
177163
}
178164

179-
fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) {
180-
lazy_static! {
181-
static ref ZIP_RE: Regex = Regex::new(r"\.zip").unwrap();
182-
}
165+
fn vpath(p: &Path) -> std::io::Result<VPath> {
166+
let Some(p_str) = p.as_os_str().to_str() else {
167+
return Ok(VPath::Native(p.to_path_buf()));
168+
};
183169

184-
let mut search_offset = 0;
170+
let normalized_path
171+
= crate::util::normalize_path(p_str);
185172

186-
while search_offset < p_bytes.len() {
187-
if let Some(m) = ZIP_RE.find_at(p_bytes, search_offset) {
188-
let idx = m.start();
189-
let next_char_idx = m.end();
190-
191-
if idx == 0 || p_bytes.get(idx - 1) == Some(&b'/') || p_bytes.get(next_char_idx) != Some(&b'/') {
192-
search_offset = next_char_idx;
193-
continue;
194-
}
195-
196-
let zip_path = &p_bytes[0..next_char_idx];
197-
let sub_path = p_bytes.get(next_char_idx + 1..);
173+
// We remove potential leading slashes to avoid __virtual__ accidentally removing them
174+
let normalized_relative_path
175+
= normalized_path.strip_prefix('/')
176+
.unwrap_or(&normalized_path);
198177

199-
return (zip_path, sub_path);
200-
} else {
201-
break;
202-
}
178+
let mut segment_it
179+
= normalized_relative_path.split('/');
180+
181+
// `split` returns [""] if the path is empty; we need to remove it
182+
if normalized_relative_path.is_empty() {
183+
segment_it.next();
203184
}
204185

205-
(p_bytes, None)
206-
}
186+
let mut base_items: Vec<&str>
187+
= Vec::new();
207188

208-
fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> {
209-
lazy_static! {
210-
static ref VIRTUAL_RE: Regex
211-
= Regex::new(
212-
"(?:^|/)((?:\\$\\$virtual|__virtual__)/(?:[^/]+)-[a-f0-9]+/([0-9]+)/)"
213-
).unwrap();
214-
}
189+
let mut virtual_items: Option<Vec<&str>>
190+
= None;
191+
let mut internal_items: Option<Vec<&str>>
192+
= None;
193+
let mut zip_items: Option<Vec<&str>>
194+
= None;
215195

216-
if let Some(m) = VIRTUAL_RE.captures(p_bytes) {
217-
if let (Some(main), Some(depth)) = (m.get(1), m.get(2)) {
218-
if let Ok(depth_n) = str::parse(io_bytes_to_str(depth.as_bytes())?) {
219-
return Ok((main.start(), Some((main.end() - main.start(), depth_n))));
220-
}
196+
while let Some(segment) = segment_it.next() {
197+
if let Some(zip_segments) = &mut zip_items {
198+
zip_segments.push(segment);
199+
continue;
221200
}
222-
}
223201

224-
Ok((p_bytes.len(), None))
225-
}
202+
if segment == "__virtual__" && virtual_items.is_none() {
203+
let mut acc_segments
204+
= Vec::with_capacity(3);
226205

227-
fn vpath(p: &Path) -> std::io::Result<VPath> {
228-
let p_str = crate::util::normalize_path(
229-
&p.as_os_str()
230-
.to_string_lossy()
231-
);
232-
233-
let p_bytes = p_str
234-
.as_bytes().to_vec();
235-
236-
let (archive_path_u8, zip_path_u8)
237-
= split_zip(&p_bytes);
238-
let (mut base_path_len, virtual_path_u8)
239-
= split_virtual(archive_path_u8)?;
240-
241-
let mut base_path_u8 = archive_path_u8;
242-
let mut virtual_segments = None;
243-
244-
if let Some((mut virtual_len, parent_depth)) = virtual_path_u8 {
245-
for _ in 0..parent_depth {
246-
if base_path_len == 1 {
247-
break;
206+
acc_segments.push(segment);
207+
208+
// We just skip the arbitrary hash, it doesn't matter what it is
209+
if let Some(hash_segment) = segment_it.next() {
210+
acc_segments.push(hash_segment);
248211
}
249212

250-
base_path_len -= 1;
251-
virtual_len += 1;
213+
// We retrieve the depth
214+
if let Some(depth_segment) = segment_it.next() {
215+
let depth = depth_segment
216+
.parse::<usize>();
217+
218+
acc_segments.push(depth_segment);
219+
220+
// We extract the backward segments from the base ones
221+
if let Ok(depth) = depth {
222+
let parent_segments = base_items
223+
.split_off(base_items.len().saturating_sub(depth));
252224

253-
while let Some(c) = archive_path_u8.get(base_path_len - 1) {
254-
if *c == b'/' {
255-
break;
256-
} else {
257-
base_path_len -= 1;
258-
virtual_len += 1;
225+
acc_segments.splice(0..0, parent_segments);
259226
}
260227
}
228+
229+
virtual_items = Some(acc_segments);
230+
internal_items = Some(vec![]);
231+
232+
continue;
261233
}
262234

263-
if let Some(c) = archive_path_u8.get(base_path_len - 1) {
264-
if *c != b'/' {
265-
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference"))
266-
}
267-
} else {
268-
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference"))
235+
if segment.len() > 4 && segment.ends_with(".zip") {
236+
zip_items = Some(vec![]);
269237
}
270238

271-
base_path_u8
272-
= &base_path_u8[0..base_path_len];
239+
if let Some(virtual_segments) = &mut virtual_items {
240+
virtual_segments.push(segment);
241+
}
273242

274-
// Trim the trailing slash
275-
if base_path_u8.len() > 1 {
276-
base_path_u8 = &base_path_u8[0..base_path_u8.len() - 1];
243+
if let Some(internal_segments) = &mut internal_items {
244+
internal_segments.push(segment);
245+
} else {
246+
base_items.push(segment);
277247
}
248+
}
278249

279-
virtual_segments = Some((
280-
io_bytes_to_str(&archive_path_u8[base_path_len..archive_path_u8.len()])?.to_string(),
281-
io_bytes_to_str(&archive_path_u8[base_path_len + virtual_len..archive_path_u8.len()])?.to_string(),
282-
));
283-
} else if zip_path_u8.is_none() {
284-
return Ok(VPath::Native(PathBuf::from(p_str)));
250+
let mut base_path = base_items.join("/");
251+
252+
// Don't forget to add back the leading slash we removed earlier
253+
if normalized_relative_path != normalized_path {
254+
base_path.insert(0, '/');
285255
}
286256

287-
if let Some(zip_path_u8) = zip_path_u8 {
288-
Ok(VPath::Zip(ZipInfo {
289-
base_path: io_bytes_to_str(base_path_u8)?.to_string(),
290-
virtual_segments,
291-
zip_path: io_bytes_to_str(zip_path_u8)?.to_string(),
292-
}))
293-
} else {
294-
Ok(VPath::Virtual(VirtualInfo {
295-
base_path: io_bytes_to_str(base_path_u8)?.to_string(),
296-
virtual_segments: virtual_segments.unwrap(),
297-
}))
257+
let virtual_info = match (virtual_items, internal_items) {
258+
(Some(virtual_segments), Some(internal_segments)) => {
259+
Some((virtual_segments.join("/"), internal_segments.join("/")))
260+
}
261+
262+
_ => {
263+
None
264+
},
265+
};
266+
267+
if let Some(zip_segments) = zip_items {
268+
if !zip_segments.is_empty() {
269+
return Ok(VPath::Zip(ZipInfo {
270+
base_path,
271+
virtual_segments: virtual_info,
272+
zip_path: zip_segments.join("/"),
273+
}));
274+
}
275+
}
276+
277+
if let Some(virtual_info) = virtual_info {
278+
return Ok(VPath::Virtual(VirtualInfo {
279+
base_path,
280+
virtual_segments: virtual_info,
281+
}));
298282
}
283+
284+
Ok(VPath::Native(PathBuf::from(base_path)))
299285
}
300286

301287
#[cfg(test)]

0 commit comments

Comments
 (0)