|
1 |
| -use lazy_static::lazy_static; |
2 |
| -use regex::bytes::Regex; |
3 | 1 | use serde::Deserialize;
|
4 | 2 | use std::{path::{Path, PathBuf}, str::Utf8Error};
|
5 | 3 |
|
@@ -77,18 +75,6 @@ pub enum Error {
|
77 | 75 | IOError(#[from] std::io::Error),
|
78 | 76 | }
|
79 | 77 |
|
80 |
| -fn make_io_utf8_error() -> std::io::Error { |
81 |
| - std::io::Error::new( |
82 |
| - std::io::ErrorKind::InvalidData, |
83 |
| - "File did not contain valid UTF-8" |
84 |
| - ) |
85 |
| -} |
86 |
| - |
87 |
| -fn io_bytes_to_str(vec: &[u8]) -> Result<&str, std::io::Error> { |
88 |
| - std::str::from_utf8(vec) |
89 |
| - .map_err(|_| make_io_utf8_error()) |
90 |
| -} |
91 |
| - |
92 | 78 | #[cfg(feature = "mmap")]
|
93 | 79 | pub fn open_zip_via_mmap<P: AsRef<Path>>(p: P) -> Result<Zip<mmap_rs::Mmap>, std::io::Error> {
|
94 | 80 | let file = fs::File::open(p)?;
|
@@ -176,126 +162,126 @@ where Storage: AsRef<[u8]> + Send + Sync {
|
176 | 162 | }
|
177 | 163 | }
|
178 | 164 |
|
179 |
| -fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) { |
180 |
| - lazy_static! { |
181 |
| - static ref ZIP_RE: Regex = Regex::new(r"\.zip").unwrap(); |
182 |
| - } |
| 165 | +fn vpath(p: &Path) -> std::io::Result<VPath> { |
| 166 | + let Some(p_str) = p.as_os_str().to_str() else { |
| 167 | + return Ok(VPath::Native(p.to_path_buf())); |
| 168 | + }; |
183 | 169 |
|
184 |
| - let mut search_offset = 0; |
| 170 | + let normalized_path |
| 171 | + = crate::util::normalize_path(p_str); |
185 | 172 |
|
186 |
| - while search_offset < p_bytes.len() { |
187 |
| - if let Some(m) = ZIP_RE.find_at(p_bytes, search_offset) { |
188 |
| - let idx = m.start(); |
189 |
| - let next_char_idx = m.end(); |
190 |
| - |
191 |
| - if idx == 0 || p_bytes.get(idx - 1) == Some(&b'/') || p_bytes.get(next_char_idx) != Some(&b'/') { |
192 |
| - search_offset = next_char_idx; |
193 |
| - continue; |
194 |
| - } |
195 |
| - |
196 |
| - let zip_path = &p_bytes[0..next_char_idx]; |
197 |
| - let sub_path = p_bytes.get(next_char_idx + 1..); |
| 173 | + // We remove potential leading slashes to avoid __virtual__ accidentally removing them |
| 174 | + let normalized_relative_path |
| 175 | + = normalized_path.strip_prefix('/') |
| 176 | + .unwrap_or(&normalized_path); |
198 | 177 |
|
199 |
| - return (zip_path, sub_path); |
200 |
| - } else { |
201 |
| - break; |
202 |
| - } |
| 178 | + let mut segment_it |
| 179 | + = normalized_relative_path.split('/'); |
| 180 | + |
| 181 | + // `split` returns [""] if the path is empty; we need to remove it |
| 182 | + if normalized_relative_path.is_empty() { |
| 183 | + segment_it.next(); |
203 | 184 | }
|
204 | 185 |
|
205 |
| - (p_bytes, None) |
206 |
| -} |
| 186 | + let mut base_items: Vec<&str> |
| 187 | + = Vec::new(); |
207 | 188 |
|
208 |
| -fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> { |
209 |
| - lazy_static! { |
210 |
| - static ref VIRTUAL_RE: Regex |
211 |
| - = Regex::new( |
212 |
| - "(?:^|/)((?:\\$\\$virtual|__virtual__)/(?:[^/]+)-[a-f0-9]+/([0-9]+)/)" |
213 |
| - ).unwrap(); |
214 |
| - } |
| 189 | + let mut virtual_items: Option<Vec<&str>> |
| 190 | + = None; |
| 191 | + let mut internal_items: Option<Vec<&str>> |
| 192 | + = None; |
| 193 | + let mut zip_items: Option<Vec<&str>> |
| 194 | + = None; |
215 | 195 |
|
216 |
| - if let Some(m) = VIRTUAL_RE.captures(p_bytes) { |
217 |
| - if let (Some(main), Some(depth)) = (m.get(1), m.get(2)) { |
218 |
| - if let Ok(depth_n) = str::parse(io_bytes_to_str(depth.as_bytes())?) { |
219 |
| - return Ok((main.start(), Some((main.end() - main.start(), depth_n)))); |
220 |
| - } |
| 196 | + while let Some(segment) = segment_it.next() { |
| 197 | + if let Some(zip_segments) = &mut zip_items { |
| 198 | + zip_segments.push(segment); |
| 199 | + continue; |
221 | 200 | }
|
222 |
| - } |
223 | 201 |
|
224 |
| - Ok((p_bytes.len(), None)) |
225 |
| -} |
| 202 | + if segment == "__virtual__" && virtual_items.is_none() { |
| 203 | + let mut acc_segments |
| 204 | + = Vec::with_capacity(3); |
226 | 205 |
|
227 |
| -fn vpath(p: &Path) -> std::io::Result<VPath> { |
228 |
| - let p_str = crate::util::normalize_path( |
229 |
| - &p.as_os_str() |
230 |
| - .to_string_lossy() |
231 |
| - ); |
232 |
| - |
233 |
| - let p_bytes = p_str |
234 |
| - .as_bytes().to_vec(); |
235 |
| - |
236 |
| - let (archive_path_u8, zip_path_u8) |
237 |
| - = split_zip(&p_bytes); |
238 |
| - let (mut base_path_len, virtual_path_u8) |
239 |
| - = split_virtual(archive_path_u8)?; |
240 |
| - |
241 |
| - let mut base_path_u8 = archive_path_u8; |
242 |
| - let mut virtual_segments = None; |
243 |
| - |
244 |
| - if let Some((mut virtual_len, parent_depth)) = virtual_path_u8 { |
245 |
| - for _ in 0..parent_depth { |
246 |
| - if base_path_len == 1 { |
247 |
| - break; |
| 206 | + acc_segments.push(segment); |
| 207 | + |
| 208 | + // We just skip the arbitrary hash, it doesn't matter what it is |
| 209 | + if let Some(hash_segment) = segment_it.next() { |
| 210 | + acc_segments.push(hash_segment); |
248 | 211 | }
|
249 | 212 |
|
250 |
| - base_path_len -= 1; |
251 |
| - virtual_len += 1; |
| 213 | + // We retrieve the depth |
| 214 | + if let Some(depth_segment) = segment_it.next() { |
| 215 | + let depth = depth_segment |
| 216 | + .parse::<usize>(); |
| 217 | + |
| 218 | + acc_segments.push(depth_segment); |
| 219 | + |
| 220 | + // We extract the backward segments from the base ones |
| 221 | + if let Ok(depth) = depth { |
| 222 | + let parent_segments = base_items |
| 223 | + .split_off(base_items.len().saturating_sub(depth)); |
252 | 224 |
|
253 |
| - while let Some(c) = archive_path_u8.get(base_path_len - 1) { |
254 |
| - if *c == b'/' { |
255 |
| - break; |
256 |
| - } else { |
257 |
| - base_path_len -= 1; |
258 |
| - virtual_len += 1; |
| 225 | + acc_segments.splice(0..0, parent_segments); |
259 | 226 | }
|
260 | 227 | }
|
| 228 | + |
| 229 | + virtual_items = Some(acc_segments); |
| 230 | + internal_items = Some(vec![]); |
| 231 | + |
| 232 | + continue; |
261 | 233 | }
|
262 | 234 |
|
263 |
| - if let Some(c) = archive_path_u8.get(base_path_len - 1) { |
264 |
| - if *c != b'/' { |
265 |
| - return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference")) |
266 |
| - } |
267 |
| - } else { |
268 |
| - return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference")) |
| 235 | + if segment.len() > 4 && segment.ends_with(".zip") { |
| 236 | + zip_items = Some(vec![]); |
269 | 237 | }
|
270 | 238 |
|
271 |
| - base_path_u8 |
272 |
| - = &base_path_u8[0..base_path_len]; |
| 239 | + if let Some(virtual_segments) = &mut virtual_items { |
| 240 | + virtual_segments.push(segment); |
| 241 | + } |
273 | 242 |
|
274 |
| - // Trim the trailing slash |
275 |
| - if base_path_u8.len() > 1 { |
276 |
| - base_path_u8 = &base_path_u8[0..base_path_u8.len() - 1]; |
| 243 | + if let Some(internal_segments) = &mut internal_items { |
| 244 | + internal_segments.push(segment); |
| 245 | + } else { |
| 246 | + base_items.push(segment); |
277 | 247 | }
|
| 248 | + } |
278 | 249 |
|
279 |
| - virtual_segments = Some(( |
280 |
| - io_bytes_to_str(&archive_path_u8[base_path_len..archive_path_u8.len()])?.to_string(), |
281 |
| - io_bytes_to_str(&archive_path_u8[base_path_len + virtual_len..archive_path_u8.len()])?.to_string(), |
282 |
| - )); |
283 |
| - } else if zip_path_u8.is_none() { |
284 |
| - return Ok(VPath::Native(PathBuf::from(p_str))); |
| 250 | + let mut base_path = base_items.join("/"); |
| 251 | + |
| 252 | + // Don't forget to add back the leading slash we removed earlier |
| 253 | + if normalized_relative_path != normalized_path { |
| 254 | + base_path.insert(0, '/'); |
285 | 255 | }
|
286 | 256 |
|
287 |
| - if let Some(zip_path_u8) = zip_path_u8 { |
288 |
| - Ok(VPath::Zip(ZipInfo { |
289 |
| - base_path: io_bytes_to_str(base_path_u8)?.to_string(), |
290 |
| - virtual_segments, |
291 |
| - zip_path: io_bytes_to_str(zip_path_u8)?.to_string(), |
292 |
| - })) |
293 |
| - } else { |
294 |
| - Ok(VPath::Virtual(VirtualInfo { |
295 |
| - base_path: io_bytes_to_str(base_path_u8)?.to_string(), |
296 |
| - virtual_segments: virtual_segments.unwrap(), |
297 |
| - })) |
| 257 | + let virtual_info = match (virtual_items, internal_items) { |
| 258 | + (Some(virtual_segments), Some(internal_segments)) => { |
| 259 | + Some((virtual_segments.join("/"), internal_segments.join("/"))) |
| 260 | + } |
| 261 | + |
| 262 | + _ => { |
| 263 | + None |
| 264 | + }, |
| 265 | + }; |
| 266 | + |
| 267 | + if let Some(zip_segments) = zip_items { |
| 268 | + if !zip_segments.is_empty() { |
| 269 | + return Ok(VPath::Zip(ZipInfo { |
| 270 | + base_path, |
| 271 | + virtual_segments: virtual_info, |
| 272 | + zip_path: zip_segments.join("/"), |
| 273 | + })); |
| 274 | + } |
| 275 | + } |
| 276 | + |
| 277 | + if let Some(virtual_info) = virtual_info { |
| 278 | + return Ok(VPath::Virtual(VirtualInfo { |
| 279 | + base_path, |
| 280 | + virtual_segments: virtual_info, |
| 281 | + })); |
298 | 282 | }
|
| 283 | + |
| 284 | + Ok(VPath::Native(PathBuf::from(base_path))) |
299 | 285 | }
|
300 | 286 |
|
301 | 287 | #[cfg(test)]
|
|
0 commit comments