Merge pull request #34 from citreae535/main

Majored · web-flow · commit f533fa8363d5 · 2022-10-10T13:32:29.000+01:00
Add an example demonstrating how to safely extract ZIP files
diff --git a/Cargo.toml b/Cargo.toml
@@ -31,4 +31,5 @@ crc32fast = "1.3.2"
 thiserror = "1.0.37"
 
 [dev-dependencies]
+sanitize-filename = "0.4.0"
 tokio = { version = "1.21.2", features = ["full"] }
diff --git a/examples/file_extraction.rs b/examples/file_extraction.rs
@@ -0,0 +1,68 @@
+//! Demonstrates how to safely extract everything from a ZIP file.
+//! 
+//! Extracting zip files from untrusted sources without proper sanitization
+//! could be exploited by directory traversal attacks.
+//! <https://en.wikipedia.org/wiki/Directory_traversal_attack#Archives>
+//! 
+//! This example tries to minimize that risk by following the implementation from
+//! Python's Standard Library.
+//! <https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile.extract>
+//! <https://github.com/python/cpython/blob/ac0a19b62ae137c2c9f53fbba8ba3f769acf34dc/Lib/zipfile.py#L1662>
+//! 
+
+use std::{path::{Path, PathBuf}, env::current_dir};
+
+use async_zip::read::seek::ZipFileReader;
+use tokio::fs::{create_dir_all, File, OpenOptions};
+
+#[tokio::main]
+async fn main() {
+    let archive = File::open("example.zip").await.expect("Failed to open zip file");
+    let out_dir = current_dir().expect("Failed to get current working directory");
+    unzip_file(archive, &out_dir).await;
+}
+
+/// Returns a relative path without reserved names, redundant separators, ".", or "..".
+fn sanitize_file_path(path: &str) -> PathBuf {
+        // Replaces backwards slashes
+    path.replace('\\', "/")
+        // Sanitizes each component
+        .split('/')
+        .map(sanitize_filename::sanitize)
+        .collect()
+}
+
+/// Extracts everything from the ZIP archive to the output directory
+async fn unzip_file(archive: File, out_dir: &Path) {
+    let mut reader = ZipFileReader::new(archive).await.expect("Failed to read zip file");
+    for index in 0..reader.entries().len() {
+        let entry = reader.entry_reader(index).await.expect("Failed to read ZipEntry");
+        let path = out_dir.join(sanitize_file_path(entry.entry().filename()));
+        // If the filename of the entry ends with '/', it is treated as a directory.
+        // This is implemented by previous versions of this crate and the Python Standard Library.
+        // https://docs.rs/async_zip/0.0.8/src/async_zip/read/mod.rs.html#63-65
+        // https://github.com/python/cpython/blob/820ef62833bd2d84a141adedd9a05998595d6b6d/Lib/zipfile.py#L528
+        let entry_is_dir = entry.entry().filename().ends_with('/');
+
+        // The directory may have been created if iteration is out of order.
+        if entry_is_dir && !path.exists() {
+            create_dir_all(&path).await.expect("Failed to create extracted directory");
+        } else {
+            // Creates parent directories. They may not exist if iteration is out of order
+            // or the archive does not contain directory entries.
+            let parent = path.parent().expect("A file entry should have parent directories");
+            if !parent.is_dir() {
+                create_dir_all(parent).await.expect("Failed to create parent directories");
+            }
+            let mut writer = OpenOptions::new()
+                .write(true)
+                .create_new(true)
+                .open(&path)
+                .await
+                .expect("Failed to create extracted file");
+            entry.copy_to_end_crc(&mut writer, 65536).await.expect("Failed to copy to extracted file");
+
+            // Closes the file and manipulates its metadata here if you wish to preserve its metadata from the archive.
+        }
+    }
+}