|
1 | | -use lazy_static::lazy_static; |
2 | | -use regex::bytes::Regex; |
3 | 1 | use serde::Deserialize; |
4 | 2 | use std::{path::{Path, PathBuf}, str::Utf8Error}; |
5 | 3 |
|
@@ -77,18 +75,6 @@ pub enum Error { |
77 | 75 | IOError(#[from] std::io::Error), |
78 | 76 | } |
79 | 77 |
|
80 | | -fn make_io_utf8_error() -> std::io::Error { |
81 | | - std::io::Error::new( |
82 | | - std::io::ErrorKind::InvalidData, |
83 | | - "File did not contain valid UTF-8" |
84 | | - ) |
85 | | -} |
86 | | - |
87 | | -fn io_bytes_to_str(vec: &[u8]) -> Result<&str, std::io::Error> { |
88 | | - std::str::from_utf8(vec) |
89 | | - .map_err(|_| make_io_utf8_error()) |
90 | | -} |
91 | | - |
92 | 78 | #[cfg(feature = "mmap")] |
93 | 79 | pub fn open_zip_via_mmap<P: AsRef<Path>>(p: P) -> Result<Zip<mmap_rs::Mmap>, std::io::Error> { |
94 | 80 | let file = fs::File::open(p)?; |
@@ -176,126 +162,126 @@ where Storage: AsRef<[u8]> + Send + Sync { |
176 | 162 | } |
177 | 163 | } |
178 | 164 |
|
179 | | -fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) { |
180 | | - lazy_static! { |
181 | | - static ref ZIP_RE: Regex = Regex::new(r"\.zip").unwrap(); |
182 | | - } |
| 165 | +fn vpath(p: &Path) -> std::io::Result<VPath> { |
| 166 | + let Some(p_str) = p.as_os_str().to_str() else { |
| 167 | + return Ok(VPath::Native(p.to_path_buf())); |
| 168 | + }; |
183 | 169 |
|
184 | | - let mut search_offset = 0; |
| 170 | + let normalized_path |
| 171 | + = crate::util::normalize_path(p_str); |
185 | 172 |
|
186 | | - while search_offset < p_bytes.len() { |
187 | | - if let Some(m) = ZIP_RE.find_at(p_bytes, search_offset) { |
188 | | - let idx = m.start(); |
189 | | - let next_char_idx = m.end(); |
190 | | - |
191 | | - if idx == 0 || p_bytes.get(idx - 1) == Some(&b'/') || p_bytes.get(next_char_idx) != Some(&b'/') { |
192 | | - search_offset = next_char_idx; |
193 | | - continue; |
194 | | - } |
195 | | - |
196 | | - let zip_path = &p_bytes[0..next_char_idx]; |
197 | | - let sub_path = p_bytes.get(next_char_idx + 1..); |
| 173 | + // We remove potential leading slashes to avoid __virtual__ accidentally removing them |
| 174 | + let normalized_relative_path |
| 175 | + = normalized_path.strip_prefix('/') |
| 176 | + .unwrap_or(&normalized_path); |
198 | 177 |
|
199 | | - return (zip_path, sub_path); |
200 | | - } else { |
201 | | - break; |
202 | | - } |
| 178 | + let mut segment_it |
| 179 | + = normalized_relative_path.split('/'); |
| 180 | + |
| 181 | + // `split` returns [""] if the path is empty; we need to remove it |
| 182 | + if normalized_relative_path.is_empty() { |
| 183 | + segment_it.next(); |
203 | 184 | } |
204 | 185 |
|
205 | | - (p_bytes, None) |
206 | | -} |
| 186 | + let mut base_items: Vec<&str> |
| 187 | + = Vec::new(); |
207 | 188 |
|
208 | | -fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> { |
209 | | - lazy_static! { |
210 | | - static ref VIRTUAL_RE: Regex |
211 | | - = Regex::new( |
212 | | - "(?:^|/)((?:\\$\\$virtual|__virtual__)/(?:[^/]+)-[a-f0-9]+/([0-9]+)/)" |
213 | | - ).unwrap(); |
214 | | - } |
| 189 | + let mut virtual_items: Option<Vec<&str>> |
| 190 | + = None; |
| 191 | + let mut internal_items: Option<Vec<&str>> |
| 192 | + = None; |
| 193 | + let mut zip_items: Option<Vec<&str>> |
| 194 | + = None; |
215 | 195 |
|
216 | | - if let Some(m) = VIRTUAL_RE.captures(p_bytes) { |
217 | | - if let (Some(main), Some(depth)) = (m.get(1), m.get(2)) { |
218 | | - if let Ok(depth_n) = str::parse(io_bytes_to_str(depth.as_bytes())?) { |
219 | | - return Ok((main.start(), Some((main.end() - main.start(), depth_n)))); |
220 | | - } |
| 196 | + while let Some(segment) = segment_it.next() { |
| 197 | + if let Some(zip_segments) = &mut zip_items { |
| 198 | + zip_segments.push(segment); |
| 199 | + continue; |
221 | 200 | } |
222 | | - } |
223 | 201 |
|
224 | | - Ok((p_bytes.len(), None)) |
225 | | -} |
| 202 | + if segment == "__virtual__" && virtual_items.is_none() { |
| 203 | + let mut acc_segments |
| 204 | + = Vec::with_capacity(3); |
226 | 205 |
|
227 | | -fn vpath(p: &Path) -> std::io::Result<VPath> { |
228 | | - let p_str = crate::util::normalize_path( |
229 | | - &p.as_os_str() |
230 | | - .to_string_lossy() |
231 | | - ); |
232 | | - |
233 | | - let p_bytes = p_str |
234 | | - .as_bytes().to_vec(); |
235 | | - |
236 | | - let (archive_path_u8, zip_path_u8) |
237 | | - = split_zip(&p_bytes); |
238 | | - let (mut base_path_len, virtual_path_u8) |
239 | | - = split_virtual(archive_path_u8)?; |
240 | | - |
241 | | - let mut base_path_u8 = archive_path_u8; |
242 | | - let mut virtual_segments = None; |
243 | | - |
244 | | - if let Some((mut virtual_len, parent_depth)) = virtual_path_u8 { |
245 | | - for _ in 0..parent_depth { |
246 | | - if base_path_len == 1 { |
247 | | - break; |
| 206 | + acc_segments.push(segment); |
| 207 | + |
| 208 | + // We just skip the arbitrary hash, it doesn't matter what it is |
| 209 | + if let Some(hash_segment) = segment_it.next() { |
| 210 | + acc_segments.push(hash_segment); |
248 | 211 | } |
249 | 212 |
|
250 | | - base_path_len -= 1; |
251 | | - virtual_len += 1; |
| 213 | + // We retrieve the depth |
| 214 | + if let Some(depth_segment) = segment_it.next() { |
| 215 | + let depth = depth_segment |
| 216 | + .parse::<usize>(); |
| 217 | + |
| 218 | + acc_segments.push(depth_segment); |
| 219 | + |
| 220 | + // We extract the backward segments from the base ones |
| 221 | + if let Ok(depth) = depth { |
| 222 | + let parent_segments = base_items |
| 223 | + .split_off(base_items.len().saturating_sub(depth)); |
252 | 224 |
|
253 | | - while let Some(c) = archive_path_u8.get(base_path_len - 1) { |
254 | | - if *c == b'/' { |
255 | | - break; |
256 | | - } else { |
257 | | - base_path_len -= 1; |
258 | | - virtual_len += 1; |
| 225 | + acc_segments.splice(0..0, parent_segments); |
259 | 226 | } |
260 | 227 | } |
| 228 | + |
| 229 | + virtual_items = Some(acc_segments); |
| 230 | + internal_items = Some(vec![]); |
| 231 | + |
| 232 | + continue; |
261 | 233 | } |
262 | 234 |
|
263 | | - if let Some(c) = archive_path_u8.get(base_path_len - 1) { |
264 | | - if *c != b'/' { |
265 | | - return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference")) |
266 | | - } |
267 | | - } else { |
268 | | - return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference")) |
| 235 | + if segment.len() > 4 && segment.ends_with(".zip") { |
| 236 | + zip_items = Some(vec![]); |
269 | 237 | } |
270 | 238 |
|
271 | | - base_path_u8 |
272 | | - = &base_path_u8[0..base_path_len]; |
| 239 | + if let Some(virtual_segments) = &mut virtual_items { |
| 240 | + virtual_segments.push(segment); |
| 241 | + } |
273 | 242 |
|
274 | | - // Trim the trailing slash |
275 | | - if base_path_u8.len() > 1 { |
276 | | - base_path_u8 = &base_path_u8[0..base_path_u8.len() - 1]; |
| 243 | + if let Some(internal_segments) = &mut internal_items { |
| 244 | + internal_segments.push(segment); |
| 245 | + } else { |
| 246 | + base_items.push(segment); |
277 | 247 | } |
| 248 | + } |
278 | 249 |
|
279 | | - virtual_segments = Some(( |
280 | | - io_bytes_to_str(&archive_path_u8[base_path_len..archive_path_u8.len()])?.to_string(), |
281 | | - io_bytes_to_str(&archive_path_u8[base_path_len + virtual_len..archive_path_u8.len()])?.to_string(), |
282 | | - )); |
283 | | - } else if zip_path_u8.is_none() { |
284 | | - return Ok(VPath::Native(PathBuf::from(p_str))); |
| 250 | + let mut base_path = base_items.join("/"); |
| 251 | + |
| 252 | + // Don't forget to add back the leading slash we removed earlier |
| 253 | + if normalized_relative_path != normalized_path { |
| 254 | + base_path.insert(0, '/'); |
285 | 255 | } |
286 | 256 |
|
287 | | - if let Some(zip_path_u8) = zip_path_u8 { |
288 | | - Ok(VPath::Zip(ZipInfo { |
289 | | - base_path: io_bytes_to_str(base_path_u8)?.to_string(), |
290 | | - virtual_segments, |
291 | | - zip_path: io_bytes_to_str(zip_path_u8)?.to_string(), |
292 | | - })) |
293 | | - } else { |
294 | | - Ok(VPath::Virtual(VirtualInfo { |
295 | | - base_path: io_bytes_to_str(base_path_u8)?.to_string(), |
296 | | - virtual_segments: virtual_segments.unwrap(), |
297 | | - })) |
| 257 | + let virtual_info = match (virtual_items, internal_items) { |
| 258 | + (Some(virtual_segments), Some(internal_segments)) => { |
| 259 | + Some((virtual_segments.join("/"), internal_segments.join("/"))) |
| 260 | + } |
| 261 | + |
| 262 | + _ => { |
| 263 | + None |
| 264 | + }, |
| 265 | + }; |
| 266 | + |
| 267 | + if let Some(zip_segments) = zip_items { |
| 268 | + if !zip_segments.is_empty() { |
| 269 | + return Ok(VPath::Zip(ZipInfo { |
| 270 | + base_path, |
| 271 | + virtual_segments: virtual_info, |
| 272 | + zip_path: zip_segments.join("/"), |
| 273 | + })); |
| 274 | + } |
| 275 | + } |
| 276 | + |
| 277 | + if let Some(virtual_info) = virtual_info { |
| 278 | + return Ok(VPath::Virtual(VirtualInfo { |
| 279 | + base_path, |
| 280 | + virtual_segments: virtual_info, |
| 281 | + })); |
298 | 282 | } |
| 283 | + |
| 284 | + Ok(VPath::Native(PathBuf::from(base_path))) |
299 | 285 | } |
300 | 286 |
|
301 | 287 | #[cfg(test)] |
|
0 commit comments