diff --git a/Cargo.lock b/Cargo.lock index 5b6901e..2b981d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -532,7 +532,6 @@ dependencies = [ "path-slash", "pathdiff", "radix_trie", - "regex", "rstest", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 0302f8e..ccca207 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ arca = "^0.7" byteorder = "1" clean-path = "0.2.1" concurrent_lru = "^0.2" -fancy-regex = "^0.13.0" +fancy-regex = { version = "^0.13.0", default-features = false } indexmap = { version = "2.7.1", features = ["serde"] } lazy_static = "1" miniz_oxide = "^0.7" @@ -20,7 +20,6 @@ mmap-rs = { version = "^0.6", optional = true } path-slash = "0.2.1" pathdiff = "^0.2" radix_trie = "0.2.1" -regex = "1" serde = { version = "1", features = ["derive"] } serde_json = "1" serde_with = { version = "3", features = ["indexmap_2"] } diff --git a/src/fs.rs b/src/fs.rs index cb6e0bf..74fc257 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -1,5 +1,3 @@ -use lazy_static::lazy_static; -use regex::bytes::Regex; use serde::Deserialize; use std::{path::{Path, PathBuf}, str::Utf8Error}; @@ -77,18 +75,6 @@ pub enum Error { IOError(#[from] std::io::Error), } -fn make_io_utf8_error() -> std::io::Error { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - "File did not contain valid UTF-8" - ) -} - -fn io_bytes_to_str(vec: &[u8]) -> Result<&str, std::io::Error> { - std::str::from_utf8(vec) - .map_err(|_| make_io_utf8_error()) -} - #[cfg(feature = "mmap")] pub fn open_zip_via_mmap>(p: P) -> Result, std::io::Error> { let file = fs::File::open(p)?; @@ -176,126 +162,126 @@ where Storage: AsRef<[u8]> + Send + Sync { } } -fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) { - lazy_static! { - static ref ZIP_RE: Regex = Regex::new(r"\.zip").unwrap(); - } +fn vpath(p: &Path) -> std::io::Result { + let Some(p_str) = p.as_os_str().to_str() else { + return Ok(VPath::Native(p.to_path_buf())); + }; - let mut search_offset = 0; + let normalized_path + = crate::util::normalize_path(p_str); - while search_offset < p_bytes.len() { - if let Some(m) = ZIP_RE.find_at(p_bytes, search_offset) { - let idx = m.start(); - let next_char_idx = m.end(); - - if idx == 0 || p_bytes.get(idx - 1) == Some(&b'/') || p_bytes.get(next_char_idx) != Some(&b'/') { - search_offset = next_char_idx; - continue; - } - - let zip_path = &p_bytes[0..next_char_idx]; - let sub_path = p_bytes.get(next_char_idx + 1..); + // We remove potential leading slashes to avoid __virtual__ accidentally removing them + let normalized_relative_path + = normalized_path.strip_prefix('/') + .unwrap_or(&normalized_path); - return (zip_path, sub_path); - } else { - break; - } + let mut segment_it + = normalized_relative_path.split('/'); + + // `split` returns [""] if the path is empty; we need to remove it + if normalized_relative_path.is_empty() { + segment_it.next(); } - (p_bytes, None) -} + let mut base_items: Vec<&str> + = Vec::new(); -fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> { - lazy_static! { - static ref VIRTUAL_RE: Regex - = Regex::new( - "(?:^|/)((?:\\$\\$virtual|__virtual__)/(?:[^/]+)-[a-f0-9]+/([0-9]+)/)" - ).unwrap(); - } + let mut virtual_items: Option> + = None; + let mut internal_items: Option> + = None; + let mut zip_items: Option> + = None; - if let Some(m) = VIRTUAL_RE.captures(p_bytes) { - if let (Some(main), Some(depth)) = (m.get(1), m.get(2)) { - if let Ok(depth_n) = str::parse(io_bytes_to_str(depth.as_bytes())?) { - return Ok((main.start(), Some((main.end() - main.start(), depth_n)))); - } + while let Some(segment) = segment_it.next() { + if let Some(zip_segments) = &mut zip_items { + zip_segments.push(segment); + continue; } - } - Ok((p_bytes.len(), None)) -} + if segment == "__virtual__" && virtual_items.is_none() { + let mut acc_segments + = Vec::with_capacity(3); -fn vpath(p: &Path) -> std::io::Result { - let p_str = crate::util::normalize_path( - &p.as_os_str() - .to_string_lossy() - ); - - let p_bytes = p_str - .as_bytes().to_vec(); - - let (archive_path_u8, zip_path_u8) - = split_zip(&p_bytes); - let (mut base_path_len, virtual_path_u8) - = split_virtual(archive_path_u8)?; - - let mut base_path_u8 = archive_path_u8; - let mut virtual_segments = None; - - if let Some((mut virtual_len, parent_depth)) = virtual_path_u8 { - for _ in 0..parent_depth { - if base_path_len == 1 { - break; + acc_segments.push(segment); + + // We just skip the arbitrary hash, it doesn't matter what it is + if let Some(hash_segment) = segment_it.next() { + acc_segments.push(hash_segment); } - base_path_len -= 1; - virtual_len += 1; + // We retrieve the depth + if let Some(depth_segment) = segment_it.next() { + let depth = depth_segment + .parse::(); + + acc_segments.push(depth_segment); + + // We extract the backward segments from the base ones + if let Ok(depth) = depth { + let parent_segments = base_items + .split_off(base_items.len().saturating_sub(depth)); - while let Some(c) = archive_path_u8.get(base_path_len - 1) { - if *c == b'/' { - break; - } else { - base_path_len -= 1; - virtual_len += 1; + acc_segments.splice(0..0, parent_segments); } } + + virtual_items = Some(acc_segments); + internal_items = Some(vec![]); + + continue; } - if let Some(c) = archive_path_u8.get(base_path_len - 1) { - if *c != b'/' { - return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference")) - } - } else { - return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference")) + if segment.len() > 4 && segment.ends_with(".zip") { + zip_items = Some(vec![]); } - base_path_u8 - = &base_path_u8[0..base_path_len]; + if let Some(virtual_segments) = &mut virtual_items { + virtual_segments.push(segment); + } - // Trim the trailing slash - if base_path_u8.len() > 1 { - base_path_u8 = &base_path_u8[0..base_path_u8.len() - 1]; + if let Some(internal_segments) = &mut internal_items { + internal_segments.push(segment); + } else { + base_items.push(segment); } + } - virtual_segments = Some(( - io_bytes_to_str(&archive_path_u8[base_path_len..archive_path_u8.len()])?.to_string(), - io_bytes_to_str(&archive_path_u8[base_path_len + virtual_len..archive_path_u8.len()])?.to_string(), - )); - } else if zip_path_u8.is_none() { - return Ok(VPath::Native(PathBuf::from(p_str))); + let mut base_path = base_items.join("/"); + + // Don't forget to add back the leading slash we removed earlier + if normalized_relative_path != normalized_path { + base_path.insert(0, '/'); } - if let Some(zip_path_u8) = zip_path_u8 { - Ok(VPath::Zip(ZipInfo { - base_path: io_bytes_to_str(base_path_u8)?.to_string(), - virtual_segments, - zip_path: io_bytes_to_str(zip_path_u8)?.to_string(), - })) - } else { - Ok(VPath::Virtual(VirtualInfo { - base_path: io_bytes_to_str(base_path_u8)?.to_string(), - virtual_segments: virtual_segments.unwrap(), - })) + let virtual_info = match (virtual_items, internal_items) { + (Some(virtual_segments), Some(internal_segments)) => { + Some((virtual_segments.join("/"), internal_segments.join("/"))) + } + + _ => { + None + }, + }; + + if let Some(zip_segments) = zip_items { + if !zip_segments.is_empty() { + return Ok(VPath::Zip(ZipInfo { + base_path, + virtual_segments: virtual_info, + zip_path: zip_segments.join("/"), + })); + } + } + + if let Some(virtual_info) = virtual_info { + return Ok(VPath::Virtual(VirtualInfo { + base_path, + virtual_segments: virtual_info, + })); } + + Ok(VPath::Native(PathBuf::from(base_path))) } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index f399c9a..5017a7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -238,7 +238,7 @@ pub fn load_pnp_manifest>(p: P) -> Result { })?; lazy_static! { - static ref RE: Regex = Regex::new("(const\\s+RAW_RUNTIME_STATE\\s*=\\s*|hydrateRuntimeState\\(JSON\\.parse\\()'").unwrap(); + static ref RE: Regex = Regex::new("(const[ \\n]+RAW_RUNTIME_STATE[ \\n]*=[ \\n]*|hydrateRuntimeState\\(JSON\\.parse\\()'").unwrap(); } let manifest_match = RE.find(&manifest_content)