Skip to content

Commit 9bbbc38

Browse files
correctly handle backslashes in entry names (i.e. don't)
1 parent f6f36d5 commit 9bbbc38

File tree

1 file changed

+16
-28
lines changed

1 file changed

+16
-28
lines changed

src/read/pipelining.rs

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,40 +17,25 @@ pub mod path_splitting {
1717
DuplicateFileDirPath(String),
1818
}
1919

20-
fn split_by_separator(entry_path: &str) -> Result<impl Iterator<Item = &str>, PathSplitError> {
21-
/* FIXME: The ZIP spec states (APPNOTE 4.4.17) that file paths are in Unix format, and Unix
22-
* filesystems treat a backslash as a normal character. Thus they should be allowed on Unix
23-
* and replaced with \u{fffd} on Windows. */
24-
if entry_path.contains('\\') {
25-
if entry_path.contains('/') {
26-
return Err(PathSplitError::PathFormat(format!(
27-
"path {:?} contained both '\\' and '/' separators",
28-
entry_path
29-
)));
30-
}
31-
Ok(entry_path.split('\\'))
32-
} else {
33-
Ok(entry_path.split('/'))
34-
}
35-
}
36-
37-
/* TODO: consider using crate::unstable::path_to_string() for this--it involves new
38-
* allocations, but that really shouldn't matter for our purposes. I like the idea of using our
39-
* own logic here, since parallel/pipelined extraction is really a different use case than the
40-
* rest of the zip crate, but it's definitely worth considering. */
20+
/* NB: path_to_string() performs some of this logic, but is intended to coerce filesystems
21+
* paths into zip entry names, whereas we are processing entry names from a zip archive, so we
22+
* perform much less error handling. */
4123
pub(crate) fn normalize_parent_dirs<'a>(
4224
entry_path: &'a str,
4325
) -> Result<(Vec<&'a str>, bool), PathSplitError> {
44-
if entry_path.starts_with('/') || entry_path.starts_with('\\') {
26+
/* The ZIP spec states (APPNOTE 4.4.17) that file paths are in Unix format, and Unix
27+
* filesystems treat a backslash as a normal character. Thus they should be allowed on Unix
28+
* and replaced with \u{fffd} on Windows. */
29+
if entry_path.starts_with('/') {
4530
return Err(PathSplitError::PathFormat(format!(
46-
"path {:?} began with '/' or '\\' and is absolute",
31+
"path {:?} began with '/' and is absolute",
4732
entry_path
4833
)));
4934
}
5035
let is_dir = is_dir(entry_path);
5136

5237
let mut ret: Vec<&'a str> = Vec::new();
53-
for component in split_by_separator(entry_path)? {
38+
for component in entry_path.split('/') {
5439
match component {
5540
/* Skip over repeated separators "//". We check separately for ending '/' with the
5641
* `is_dir` variable. */
@@ -139,7 +124,7 @@ pub mod path_splitting {
139124
let mut cur_dir = &mut base_dir;
140125

141126
/* Split entries by directory components, and normalize any non-literal paths
142-
* (e.g. '..', '.', leading '/', repeated '/', similarly for windows '\\'). */
127+
* (e.g. '..', '.', leading '/', repeated '/'). */
143128
let (all_components, is_dir) = normalize_parent_dirs(entry_path)?;
144129
/* If the entry is a directory by mode, then it does not need to end in '/'. */
145130
let is_dir = is_dir || data.is_dir_by_mode();
@@ -211,10 +196,13 @@ pub mod path_splitting {
211196
);
212197
assert_eq!(normalize_parent_dirs("./a").unwrap(), (vec!["a"], false));
213198
assert_eq!(normalize_parent_dirs("a/../b/").unwrap(), (vec!["b"], true));
214-
assert_eq!(normalize_parent_dirs("a\\").unwrap(), (vec!["a"], true));
199+
assert_eq!(normalize_parent_dirs("a/").unwrap(), (vec!["a"], true));
215200
assert!(normalize_parent_dirs("/a").is_err());
216-
assert!(normalize_parent_dirs("\\a").is_err());
217-
assert!(normalize_parent_dirs("a\\b/").is_err());
201+
assert_eq!(normalize_parent_dirs("\\a").unwrap(), (vec!["\\a"], false));
202+
assert_eq!(
203+
normalize_parent_dirs("a\\b/").unwrap(),
204+
(vec!["a\\b"], true)
205+
);
218206
assert!(normalize_parent_dirs("a/../../b").is_err());
219207
assert!(normalize_parent_dirs("./").is_err());
220208
}

0 commit comments

Comments
 (0)