Skip to content

Commit 0ed518c

Browse files
committed
Exposes more data in the VPath split
1 parent fd4f5d0 commit 0ed518c

File tree

3 files changed

+154
-71
lines changed

3 files changed

+154
-71
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ homepage = "https://yarnpkg.com"
88
repository = "https://github.com/yarnpkg/berry/"
99

1010
[dependencies]
11+
anyhow = "1.0.70"
1112
arca = "0.1.3"
1213
fancy-regex = "0.11.0"
1314
lazy_static = "1.4.0"

src/fs.rs

Lines changed: 146 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@ use serde::Deserialize;
55
use std::{path::{Path, PathBuf}, fs, io::{BufReader, Read}, collections::{HashSet, HashMap}, str::Utf8Error, num::NonZeroUsize};
66
use zip::{ZipArchive, result::ZipError};
77

8-
#[derive(Clone)]
9-
#[derive(Debug)]
10-
#[derive(Deserialize)]
11-
#[derive(PartialEq)]
8+
#[derive(Clone, Debug, Deserialize, PartialEq)]
9+
#[serde(rename_all = "camelCase")]
10+
pub struct VPathInfo {
11+
pub base_path: String,
12+
pub virtual_segments: Option<(String, String)>,
13+
pub zip_path: Option<String>,
14+
}
15+
16+
17+
#[derive(Clone, Debug, Deserialize, PartialEq)]
1218
#[serde(untagged)]
1319
pub enum VPath {
14-
Zip(PathBuf, String),
20+
Virtual(VPathInfo),
1521
Native(PathBuf),
1622
}
1723

@@ -201,76 +207,109 @@ impl ZipCache for LruZipCache {
201207
}
202208
}
203209

204-
pub fn vpath(p: &Path) -> Result<VPath, std::io::Error> {
210+
pub fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) {
205211
lazy_static! {
206-
// $0: full path
207-
// $1: virtual folder
208-
// $2: virtual segment
209-
// $3: hash
210-
// $4: depth
211-
// $5: subpath
212-
static ref VIRTUAL_RE: Regex = Regex::new("(/?(?:[^/]+/)*?)(?:\\$\\$virtual|__virtual__)((?:/((?:[^/]+-)?[a-f0-9]+)(?:/([^/]+))?)?((?:/.*)?))$").unwrap();
213-
static ref ZIP_RE: Regex = Regex::new("\\.zip").unwrap();
212+
static ref ZIP_RE: Regex = Regex::new(r"\.zip").unwrap();
214213
}
215214

216-
let mut p_str = p.as_os_str()
217-
.to_string_lossy()
218-
.to_string();
215+
let mut search_offset = 0;
219216

220-
let mut p_bytes = arca::path::normalize_path(p_str.clone())
221-
.as_bytes().to_vec();
217+
while search_offset < p_bytes.len() {
218+
if let Some(m) = ZIP_RE.find_at(&p_bytes, search_offset) {
219+
let idx = m.start();
220+
let next_char_idx = m.end();
221+
222+
if idx == 0 || p_bytes.get(idx - 1) == Some(&b'/') || p_bytes.get(next_char_idx) != Some(&b'/') {
223+
search_offset = next_char_idx;
224+
continue;
225+
}
226+
227+
let zip_path = &p_bytes[0..next_char_idx + 1];
228+
let sub_path = p_bytes.get(next_char_idx + 1..);
229+
230+
return (zip_path, sub_path);
231+
} else {
232+
break;
233+
}
234+
}
235+
236+
(p_bytes, None)
237+
}
238+
239+
pub fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> {
240+
lazy_static! {
241+
static ref VIRTUAL_RE: Regex = Regex::new("(?:^|/)((?:\\$\\$virtual|__virtual__)/[a-f0-9]+/([0-9]+)/)").unwrap();
242+
}
222243

223244
if let Some(m) = VIRTUAL_RE.captures(&p_bytes) {
224-
if let (Some(target), Some(depth), Some(subpath)) = (m.get(1), m.get(4), m.get(5)) {
245+
if let (Some(main), Some(depth)) = (m.get(1), m.get(2)) {
225246
if let Ok(depth_n) = str::parse(io_bytes_to_str(&depth.as_bytes())?) {
226-
let bytes = [
227-
&target.as_bytes(),
228-
&b"../".repeat(depth_n)[0..],
229-
&subpath.as_bytes(),
230-
].concat();
231-
232-
p_str = arca::path::normalize_path(io_bytes_to_str(&bytes)?);
233-
p_bytes = p_str.as_bytes().to_vec();
247+
return Ok((main.start(), Some((main.end() - main.start(), depth_n))));
234248
}
235249
}
236250
}
237251

238-
if let Some(m) = ZIP_RE.find(&p_bytes) {
239-
let mut idx = m.start();
240-
let mut next_char_idx;
241-
loop {
242-
next_char_idx = idx + 4;
243-
if p_bytes.get(next_char_idx) == Some(&b'/') {
244-
break;
245-
}
252+
Ok((p_bytes.len(), None))
253+
}
246254

247-
if idx == 0 || p_bytes.get(idx - 1) == Some(&b'/') {
248-
return Ok(VPath::Native(p.to_owned()))
249-
}
255+
pub fn vpath(p: &Path) -> std::io::Result<VPath> {
256+
let p_str = arca::path::normalize_path(
257+
p.as_os_str()
258+
.to_string_lossy()
259+
.to_string()
260+
);
261+
262+
let p_bytes = p_str
263+
.as_bytes().to_vec();
250264

251-
if let Some(next_m) = ZIP_RE.find_at(&p_bytes, next_char_idx) {
252-
idx = next_m.start();
253-
} else {
254-
break;
265+
let (archive_path_u8, zip_path_u8)
266+
= split_zip(&p_bytes);
267+
let (mut base_path_len, virtual_path_u8)
268+
= split_virtual(archive_path_u8)?;
269+
270+
let mut base_path_u8 = archive_path_u8;
271+
let mut virtual_segments = None;
272+
273+
if let Some((mut virtual_len, parent_depth)) = virtual_path_u8 {
274+
for _ in 0..parent_depth {
275+
base_path_len -= 1;
276+
virtual_len += 1;
277+
278+
while let Some(c) = archive_path_u8.get(base_path_len - 1) {
279+
if *c == b'/' {
280+
break;
281+
} else {
282+
base_path_len -= 1;
283+
virtual_len += 1;
284+
}
255285
}
256286
}
257287

258-
if p_bytes.len() > next_char_idx && p_bytes.get(next_char_idx) != Some(&b'/') {
259-
Ok(VPath::Native(PathBuf::from(p_str)))
288+
if let Some(c) = archive_path_u8.get(base_path_len - 1) {
289+
if *c != b'/' {
290+
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference"))
291+
}
260292
} else {
261-
let zip_path = PathBuf::from(io_bytes_to_str(&p_bytes[0..next_char_idx])?);
293+
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Invalid virtual back-reference"))
294+
}
262295

263-
let sub_path = if next_char_idx + 1 < p_bytes.len() {
264-
arca::path::normalize_path(io_bytes_to_str(&p_bytes[next_char_idx + 1..])?)
265-
} else {
266-
return Ok(VPath::Native(zip_path))
267-
};
296+
base_path_u8 = &archive_path_u8[0..base_path_len];
268297

269-
Ok(VPath::Zip(zip_path, sub_path))
270-
}
271-
} else {
272-
Ok(VPath::Native(PathBuf::from(p_str)))
298+
virtual_segments = Some((
299+
io_bytes_to_str(&archive_path_u8[base_path_len..archive_path_u8.len()])?.to_string(),
300+
io_bytes_to_str(&archive_path_u8[base_path_len + virtual_len..archive_path_u8.len()])?.to_string(),
301+
));
302+
} else if let None = zip_path_u8 {
303+
return Ok(VPath::Native(PathBuf::from(p_str)));
273304
}
305+
306+
Ok(VPath::Virtual(VPathInfo {
307+
base_path: io_bytes_to_str(base_path_u8)?.to_string(),
308+
virtual_segments,
309+
zip_path: zip_path_u8.map(|data| {
310+
io_bytes_to_str(data).map(|str| str.to_string())
311+
}).transpose()?,
312+
}))
274313
}
275314

276315
#[cfg(test)]
@@ -319,33 +358,69 @@ mod tests {
319358

320359
#[test]
321360
fn test_path_to_pnp() {
322-
let tests: Vec<(PathBuf, Option<VPath>)> = serde_json::from_str(r#"[
361+
let tests: Vec<(String, Option<VPath>)> = serde_json::from_str(r#"[
323362
[".zip", null],
324363
["foo", null],
325-
["foo.zip", "foo.zip"],
326-
["foo.zip/bar", ["foo.zip", "bar"]],
327-
["foo.zip/bar/baz", ["foo.zip", "bar/baz"]],
328-
["/a/b/c/foo.zip", "/a/b/c/foo.zip"],
329-
["./a/b/c/foo.zip", "a/b/c/foo.zip"],
330-
["./a/b/__virtual__/abcdef/0/c/d", "a/b/c/d"],
331-
["./a/b/__virtual__/abcdef/1/c/d", "a/c/d"],
332-
["./a/b/__virtual__/abcdef/0/c/foo.zip/bar", ["a/b/c/foo.zip", "bar"]],
333-
["./a/b/__virtual__/abcdef/1/c/foo.zip/bar", ["a/c/foo.zip", "bar"]],
364+
["foo.zip", null],
365+
["foo.zip/bar", {
366+
"basePath": "foo.zip/",
367+
"virtualSegments": null,
368+
"zipPath": "bar"
369+
}],
370+
["foo.zip/bar/baz", {
371+
"basePath": "foo.zip/",
372+
"virtualSegments": null,
373+
"zipPath": "bar/baz"
374+
}],
375+
["/a/b/c/foo.zip", null],
376+
["./a/b/c/foo.zip", null],
377+
["./a/b/__virtual__/abcdef/0/c/d", {
378+
"basePath": "a/b/",
379+
"virtualSegments": ["__virtual__/abcdef/0/c/d", "c/d"],
380+
"zipPath": null
381+
}],
382+
["./a/b/__virtual__/abcdef/1/c/d", {
383+
"basePath": "a/",
384+
"virtualSegments": ["b/__virtual__/abcdef/1/c/d", "c/d"],
385+
"zipPath": null
386+
}],
387+
["./a/b/__virtual__/abcdef/0/c/foo.zip/bar", {
388+
"basePath": "a/b/",
389+
"virtualSegments": ["__virtual__/abcdef/0/c/foo.zip/", "c/foo.zip/"],
390+
"zipPath": "bar"
391+
}],
392+
["./a/b/__virtual__/abcdef/1/c/foo.zip/bar", {
393+
"basePath": "a/",
394+
"virtualSegments": ["b/__virtual__/abcdef/1/c/foo.zip/", "c/foo.zip/"],
395+
"zipPath": "bar"
396+
}],
334397
["./a/b/c/.zip", null],
335398
["./a/b/c/foo.zipp", null],
336-
["./a/b/c/foo.zip/bar/baz/qux.zip", ["a/b/c/foo.zip", "bar/baz/qux.zip"]],
337-
["./a/b/c/foo.zip-bar.zip", "a/b/c/foo.zip-bar.zip"],
338-
["./a/b/c/foo.zip-bar.zip/bar/baz/qux.zip", ["a/b/c/foo.zip-bar.zip", "bar/baz/qux.zip"]],
339-
["./a/b/c/foo.zip-bar/foo.zip-bar/foo.zip-bar.zip/d", ["a/b/c/foo.zip-bar/foo.zip-bar/foo.zip-bar.zip", "d"]]
399+
["./a/b/c/foo.zip/bar/baz/qux.zip", {
400+
"basePath": "a/b/c/foo.zip/",
401+
"virtualSegments": null,
402+
"zipPath": "bar/baz/qux.zip"
403+
}],
404+
["./a/b/c/foo.zip-bar.zip", null],
405+
["./a/b/c/foo.zip-bar.zip/bar/baz/qux.zip", {
406+
"basePath": "a/b/c/foo.zip-bar.zip/",
407+
"virtualSegments": null,
408+
"zipPath": "bar/baz/qux.zip"
409+
}],
410+
["./a/b/c/foo.zip-bar/foo.zip-bar/foo.zip-bar.zip/d", {
411+
"basePath": "a/b/c/foo.zip-bar/foo.zip-bar/foo.zip-bar.zip/",
412+
"virtualSegments": null,
413+
"zipPath": "d"
414+
}]
340415
]"#).expect("Assertion failed: Expected the expectations to be loaded");
341416

342417
for (input, expected) in tests.iter() {
343418
let expectation: VPath = match expected {
344419
Some(p) => p.clone(),
345-
None => VPath::Native(input.clone()),
420+
None => VPath::Native(PathBuf::from(arca::path::normalize_path(input))),
346421
};
347422

348-
match vpath(input) {
423+
match vpath(&PathBuf::from(input)) {
349424
Ok(res) => {
350425
assert_eq!(res, expectation, "input='{:?}'", input);
351426
}

0 commit comments

Comments
 (0)