|
| 1 | +//! Demonstrates how to safely extract everything from a ZIP file. |
| 2 | +//! |
| 3 | +//! Extracting zip files from untrusted sources without proper sanitization |
| 4 | +//! could be exploited by directory traversal attacks. |
| 5 | +//! <https://en.wikipedia.org/wiki/Directory_traversal_attack#Archives> |
| 6 | +//! |
| 7 | +//! This example tries to minimize that risk by following the implementation from |
| 8 | +//! Python's Standard Library. |
| 9 | +//! <https://docs.python.org/3/library/zipfile.html#zipfile.ZipFile.extract> |
| 10 | +//! <https://github.com/python/cpython/blob/ac0a19b62ae137c2c9f53fbba8ba3f769acf34dc/Lib/zipfile.py#L1662> |
| 11 | +//! |
| 12 | +
|
| 13 | +use std::{path::{Path, PathBuf}, env::current_dir}; |
| 14 | + |
| 15 | +use async_zip::read::seek::ZipFileReader; |
| 16 | +use tokio::fs::{create_dir_all, File, OpenOptions}; |
| 17 | + |
| 18 | +#[tokio::main] |
| 19 | +async fn main() { |
| 20 | + let archive = File::open("example.zip").await.expect("Failed to open zip file"); |
| 21 | + let out_dir = current_dir().expect("Failed to get current working directory"); |
| 22 | + unzip_file(archive, &out_dir).await; |
| 23 | +} |
| 24 | + |
| 25 | +/// Returns a relative path without reserved names, redundant separators, ".", or "..". |
| 26 | +fn sanitize_file_path(path: &str) -> PathBuf { |
| 27 | + // Replaces backwards slashes |
| 28 | + path.replace('\\', "/") |
| 29 | + // Sanitizes each component |
| 30 | + .split('/') |
| 31 | + .map(sanitize_filename::sanitize) |
| 32 | + .collect() |
| 33 | +} |
| 34 | + |
| 35 | +/// Extracts everything from the ZIP archive to the output directory |
| 36 | +async fn unzip_file(archive: File, out_dir: &Path) { |
| 37 | + let mut reader = ZipFileReader::new(archive).await.expect("Failed to read zip file"); |
| 38 | + for index in 0..reader.entries().len() { |
| 39 | + let entry = reader.entry_reader(index).await.expect("Failed to read ZipEntry"); |
| 40 | + let path = out_dir.join(sanitize_file_path(entry.entry().filename())); |
| 41 | + // If the filename of the entry ends with '/', it is treated as a directory. |
| 42 | + // This is implemented by previous versions of this crate and the Python Standard Library. |
| 43 | + // https://docs.rs/async_zip/0.0.8/src/async_zip/read/mod.rs.html#63-65 |
| 44 | + // https://github.com/python/cpython/blob/820ef62833bd2d84a141adedd9a05998595d6b6d/Lib/zipfile.py#L528 |
| 45 | + let entry_is_dir = entry.entry().filename().ends_with('/'); |
| 46 | + |
| 47 | + // The directory may have been created if iteration is out of order. |
| 48 | + if entry_is_dir && !path.exists() { |
| 49 | + create_dir_all(&path).await.expect("Failed to create extracted directory"); |
| 50 | + } else { |
| 51 | + // Creates parent directories. They may not exist if iteration is out of order |
| 52 | + // or the archive does not contain directory entries. |
| 53 | + let parent = path.parent().expect("A file entry should have parent directories"); |
| 54 | + if !parent.is_dir() { |
| 55 | + create_dir_all(parent).await.expect("Failed to create parent directories"); |
| 56 | + } |
| 57 | + let mut writer = OpenOptions::new() |
| 58 | + .write(true) |
| 59 | + .create_new(true) |
| 60 | + .open(&path) |
| 61 | + .await |
| 62 | + .expect("Failed to create extracted file"); |
| 63 | + entry.copy_to_end_crc(&mut writer, 65536).await.expect("Failed to copy to extracted file"); |
| 64 | + |
| 65 | + // Closes the file and manipulates its metadata here if you wish to preserve its metadata from the archive. |
| 66 | + } |
| 67 | + } |
| 68 | +} |
0 commit comments