|
| 1 | +use tokio::io::AsyncReadExt; |
| 2 | +use zip::result::ZipError; |
| 3 | + |
| 4 | +use crate::error::DownloadError; |
| 5 | +use std::{ |
| 6 | + io::{self, Read}, |
| 7 | + path::Path, |
| 8 | +}; |
| 9 | + |
| 10 | +#[derive(Debug)] |
| 11 | +enum ArchiveFormat { |
| 12 | + Zip, |
| 13 | + Gz, |
| 14 | + Xz, |
| 15 | + Bz2, |
| 16 | + Zst, |
| 17 | +} |
| 18 | + |
| 19 | +const ZIP_MAGIC_BYTES: [u8; 4] = [0x50, 0x4B, 0x03, 0x04]; |
| 20 | +const GZIP_MAGIC_BYTES: [u8; 2] = [0x1F, 0x8B]; |
| 21 | +const XZ_MAGIC_BYTES: [u8; 6] = [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]; |
| 22 | +const BZIP2_MAGIC_BYTES: [u8; 3] = [0x42, 0x5A, 0x68]; |
| 23 | +const ZSTD_MAGIC_BYTES: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD]; |
| 24 | + |
| 25 | +/// Extracts the contents of an archive file to a directory. |
| 26 | +/// |
| 27 | +/// This function automatically detects the archive format based on file signatures, |
| 28 | +/// then extracts its contents to a directory named after the archive file. |
| 29 | +/// |
| 30 | +/// # Arguments |
| 31 | +/// * `path` - Path to the archive file to be extracted |
| 32 | +/// * `output_dir` - Path where contents should be extracted |
| 33 | +/// |
| 34 | +/// # Returns |
| 35 | +/// * `Ok(())` if extraction was successful |
| 36 | +/// * `Err(DownloadError)` if an error occurred during extraction |
| 37 | +pub async fn extract_archive<P: AsRef<Path>>(path: P, output_dir: P) -> Result<(), DownloadError> { |
| 38 | + let path = path.as_ref(); |
| 39 | + let output_dir = output_dir.as_ref(); |
| 40 | + let mut file = tokio::fs::File::open(path).await?; |
| 41 | + let mut magic = vec![0u8; 6]; |
| 42 | + let n = file.read(&mut magic).await?; |
| 43 | + let magic = &magic[..n]; |
| 44 | + |
| 45 | + let Some(format) = detect_archive_format(magic) else { |
| 46 | + return Ok(()); |
| 47 | + }; |
| 48 | + |
| 49 | + match format { |
| 50 | + ArchiveFormat::Zip => extract_zip(path, &output_dir) |
| 51 | + .await |
| 52 | + .map_err(|err| DownloadError::ZipError(err)), |
| 53 | + ArchiveFormat::Gz => extract_tar(path, &output_dir, flate2::read::GzDecoder::new).await, |
| 54 | + ArchiveFormat::Xz => extract_tar(path, &output_dir, xz2::read::XzDecoder::new).await, |
| 55 | + ArchiveFormat::Bz2 => extract_tar(path, &output_dir, bzip2::read::BzDecoder::new).await, |
| 56 | + ArchiveFormat::Zst => { |
| 57 | + extract_tar(path, &output_dir, |f| { |
| 58 | + zstd::stream::read::Decoder::new(f).unwrap() |
| 59 | + }) |
| 60 | + .await |
| 61 | + } |
| 62 | + } |
| 63 | +} |
| 64 | + |
| 65 | +/// Helper function to safely check if a byte slice starts with a pattern |
| 66 | +fn starts_with(data: &[u8], pattern: &[u8]) -> bool { |
| 67 | + data.len() >= pattern.len() && &data[..pattern.len()] == pattern |
| 68 | +} |
| 69 | + |
| 70 | +/// Detects the archive format by examining the file's magic bytes (signature). |
| 71 | +/// |
| 72 | +/// # Arguments |
| 73 | +/// * `magic` - Byte slice containing the beginning of the file (typically first 512 bytes) |
| 74 | +/// |
| 75 | +/// # Returns |
| 76 | +/// * `Some(ArchiveFormat)` - The detected archive format |
| 77 | +/// * `None` - If the format could not be recognized |
| 78 | +fn detect_archive_format(magic: &[u8]) -> Option<ArchiveFormat> { |
| 79 | + if starts_with(magic, &ZIP_MAGIC_BYTES) { |
| 80 | + return Some(ArchiveFormat::Zip); |
| 81 | + } |
| 82 | + |
| 83 | + if starts_with(magic, &GZIP_MAGIC_BYTES) { |
| 84 | + return Some(ArchiveFormat::Gz); |
| 85 | + } |
| 86 | + |
| 87 | + if starts_with(magic, &XZ_MAGIC_BYTES) { |
| 88 | + return Some(ArchiveFormat::Xz); |
| 89 | + } |
| 90 | + |
| 91 | + if starts_with(magic, &BZIP2_MAGIC_BYTES) { |
| 92 | + return Some(ArchiveFormat::Bz2); |
| 93 | + } |
| 94 | + |
| 95 | + if starts_with(magic, &ZSTD_MAGIC_BYTES) { |
| 96 | + return Some(ArchiveFormat::Zst); |
| 97 | + } |
| 98 | + |
| 99 | + None |
| 100 | +} |
| 101 | + |
| 102 | +/// Generic function for extracting TAR-based archives with different compression formats. |
| 103 | +/// |
| 104 | +/// This function handles the common extraction logic for all TAR-based formats by |
| 105 | +/// accepting a decompression function that converts the compressed stream to a |
| 106 | +/// readable stream. |
| 107 | +/// |
| 108 | +/// # Arguments |
| 109 | +/// * `path` - Path to the archive file |
| 110 | +/// * `output_dir` - Path where contents should be extracted |
| 111 | +/// * `decompress` - Function that takes a file and returns a decompressed reader |
| 112 | +/// |
| 113 | +/// # Returns |
| 114 | +/// * `Ok(())` if extraction was successful |
| 115 | +/// * `Err(DownloadError)` if an error occurred |
| 116 | +async fn extract_tar<F, R>( |
| 117 | + path: &Path, |
| 118 | + output_dir: &Path, |
| 119 | + decompress: F, |
| 120 | +) -> Result<(), DownloadError> |
| 121 | +where |
| 122 | + F: FnOnce(std::fs::File) -> R + Send + 'static, |
| 123 | + R: Read + Send + 'static, |
| 124 | +{ |
| 125 | + let path = path.to_path_buf(); |
| 126 | + let output_dir = output_dir.to_path_buf(); |
| 127 | + |
| 128 | + let file = std::fs::File::open(&path)?; |
| 129 | + let decompressed = decompress(file); |
| 130 | + let mut archive = tar::Archive::new(decompressed); |
| 131 | + archive.unpack(&output_dir)?; |
| 132 | + |
| 133 | + Ok(()) |
| 134 | +} |
| 135 | + |
| 136 | +/// Extracts a ZIP archive to the specified output directory. |
| 137 | +/// |
| 138 | +/// # Arguments |
| 139 | +/// * `path` - Path to the ZIP archive |
| 140 | +/// * `output_dir` - Directory where the contents should be extracted |
| 141 | +/// |
| 142 | +/// # Returns |
| 143 | +/// * `Ok(())` if extraction was successful |
| 144 | +/// * `Err(DownloadError)` if an error occurred |
| 145 | +async fn extract_zip(path: &Path, output_dir: &Path) -> Result<(), ZipError> { |
| 146 | + let path = path.to_path_buf(); |
| 147 | + let output_dir = output_dir.to_path_buf(); |
| 148 | + |
| 149 | + let file = std::fs::File::open(&path)?; |
| 150 | + let mut archive = zip::ZipArchive::new(file)?; |
| 151 | + |
| 152 | + for i in 0..archive.len() { |
| 153 | + let mut file = archive.by_index(i)?; |
| 154 | + let out_path = output_dir.join(file.name()); |
| 155 | + |
| 156 | + if file.name().ends_with('/') { |
| 157 | + std::fs::create_dir_all(&out_path)?; |
| 158 | + } else { |
| 159 | + if let Some(p) = out_path.parent() { |
| 160 | + if !p.exists() { |
| 161 | + std::fs::create_dir_all(p)?; |
| 162 | + } |
| 163 | + } |
| 164 | + let mut out_file = std::fs::File::create(&out_path)?; |
| 165 | + io::copy(&mut file, &mut out_file)?; |
| 166 | + } |
| 167 | + } |
| 168 | + Ok(()) |
| 169 | +} |
0 commit comments