Skip to content

Commit 5ac5142

Browse files
committed
Add file extraction example
1 parent 8e49ab1 commit 5ac5142

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,5 @@ crc32fast = "1.3.2"
3131
thiserror = "1.0.37"
3232

3333
[dev-dependencies]
34+
sanitize-filename = "0.4.0"
3435
tokio = { version = "1.21.2", features = ["full"] }

examples/file_extraction.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//! Demonstrates how to safely extract everything from a ZIP file.
2+
//!
3+
//! Extracting zip files from untrusted sources without proper sanitization
4+
//! could be exploited by directory traversal attacks.
5+
//! <https://en.wikipedia.org/wiki/Directory_traversal_attack#Archives>
6+
//!
7+
//! This example tries to minimize that risk by following the implementation from
8+
//! Python's Standard Library.
9+
//! <https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile.extract>
10+
//! <https://github.com/python/cpython/blob/ac0a19b62ae137c2c9f53fbba8ba3f769acf34dc/Lib/zipfile.py#L1662>
11+
//!
12+
13+
use std::{path::{Path, PathBuf}, env::current_dir};
14+
15+
use async_zip::read::seek::ZipFileReader;
16+
use tokio::fs::{create_dir_all, File, OpenOptions};
17+
18+
#[tokio::main]
19+
async fn main() {
20+
let archive = File::open("example.zip").await.expect("Failed to open zip file");
21+
let out_dir = current_dir().expect("Failed to get current working directory");
22+
unzip_file(archive, &out_dir).await;
23+
}
24+
25+
/// Returns a relative path without reserved names, redundant separators, ".", or "..".
26+
fn sanitize_file_path(path: &str) -> PathBuf {
27+
// Replaces backwards slashes
28+
path.replace('\\', "/")
29+
// Sanitizes each component
30+
.split('/')
31+
.map(sanitize_filename::sanitize)
32+
.collect()
33+
}
34+
35+
/// Extracts everything from the ZIP archive to the output directory
36+
async fn unzip_file(archive: File, out_dir: &Path) {
37+
let mut reader = ZipFileReader::new(archive).await.expect("Failed to read zip file");
38+
for index in 0..reader.entries().len() {
39+
let entry = reader.entry_reader(index).await.expect("Failed to read ZipEntry");
40+
let path = out_dir.join(sanitize_file_path(entry.entry().filename()));
41+
// If the filename of the entry ends with '/', it is treated as a directory.
42+
// This is implemented by previous versions of this crate and the Python Standard Library.
43+
// https://docs.rs/async_zip/0.0.8/src/async_zip/read/mod.rs.html#63-65
44+
// https://github.com/python/cpython/blob/820ef62833bd2d84a141adedd9a05998595d6b6d/Lib/zipfile.py#L528
45+
let entry_is_dir = entry.entry().filename().ends_with('/');
46+
47+
// The directory may have been created if iteration is out of order.
48+
if entry_is_dir && !path.exists() {
49+
create_dir_all(&path).await.expect("Failed to create extracted directory");
50+
} else {
51+
// Creates parent directories. They may not exist if iteration is out of order
52+
// or the archive does not contain directory entries.
53+
let parent = path.parent().expect("A file entry should have parent directories");
54+
if !parent.is_dir() {
55+
create_dir_all(parent).await.expect("Failed to create parent directories");
56+
}
57+
let mut writer = OpenOptions::new()
58+
.write(true)
59+
.create_new(true)
60+
.open(&path)
61+
.await
62+
.expect("Failed to create extracted file");
63+
entry.copy_to_end_crc(&mut writer, 65536).await.expect("Failed to copy to extracted file");
64+
65+
// Closes the file and manipulates its metadata here if you wish to preserve its metadata from the archive.
66+
}
67+
}
68+
}

0 commit comments

Comments
 (0)