Skip to content

Commit 5d69fc0

Browse files
pipelined extraction
- initial sketch of lexicographic trie for pipelining - move path splitting into a submodule - lex trie can now propagate entry data - outline handle allocation - mostly handle files - mostly handle dirs - clarify symlink FIXMEs - do symlink validation - extract writable dir setting to helper method - modify args to handle allocation method - handle allocation test passes - simplify perms a lot - outline evaluation - handle symlinks - BIGGER CHANGE! add EntryReader/etc - make initial pipelined extract work - fix file perms by writing them after finishing the file write - support directory entries by unix mode as well - impl split extraction - remove dependency on reader refactoring - add dead_code to methods we don't use yet - bzip2 support needed for benchmark test - correctly handle backslashes in entry names (i.e. don't) - make PathSplitError avoid consing a String until necessary - add repro_old423 test for pipelining - silence dead code warnings for windows - avoid erroring for top-level directory entries - use num_cpus by default for parallelism - we spawn three threads per chunk - add dynamically-generated test archive - initialize the test archives exactly once in statics - add benchmarks for dynamic and static test data - use lazy_static - add FIXME for follow-up work to support absolute paths - impl From<DirEntry<...>> for FSEntry - move handle_creation module to a separate file - downgrade HandleCreationError to io::Error - use ByAddress over ZipDataHandle - replace unsafe transmutes with Pod methods - add note about shared future dependency task DAG - box each level of the b-tree together with its values this may technically reduce heap fragmentation, but since this data structure only exists temporarily, that's probably not too important. instead, this change just reduces the amount of coercion and unboxing we need to do
1 parent e306cd0 commit 5d69fc0

File tree

3 files changed

+20
-20
lines changed

3 files changed

+20
-20
lines changed

Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,24 @@ time = { version = "0.3.37", default-features = false }
2828

2929
[dependencies]
3030
aes = { version = "0.8", optional = true }
31+
by_address = { version = "1.2.1", optional = true }
3132
bzip2 = { version = "0.6.0", optional = true }
3233
chrono = { version = "^0.4.27", optional = true }
3334
constant_time_eq = { version = "0.3.1", optional = true }
3435
crc32fast = "1.4"
36+
displaydoc = "0.2.5"
3537
flate2 = { version = "1.1.1", default-features = false, optional = true }
3638
getrandom = { version = "0.3.1", features = ["std"], optional = true }
3739
hmac = { version = "0.12", optional = true, features = ["reset"] }
3840
indexmap = "2"
3941
jiff = { version = "0.2.4", optional = true }
4042
memchr = "2.7"
4143
nt-time = { version = "0.10.6", default-features = false, optional = true }
44+
num_cpus = { version = "1.16", optional = true }
4245
ppmd-rust = { version = "1.2", optional = true }
4346
pbkdf2 = { version = "0.12", optional = true }
4447
sha1 = { version = "0.10", optional = true }
48+
thiserror = "2"
4549
time = { workspace = true, optional = true, features = [
4650
"std",
4751
] }
@@ -69,8 +73,7 @@ anyhow = "1.0.95"
6973
clap = { version = "=4.4.18", features = ["derive"] }
7074
tempdir = "0.3.7"
7175
tempfile = "3.15"
72-
lazy_static = "1.5"
73-
num_cpus = "1.16"
76+
num_cpus = "1"
7477

7578
[features]
7679
aes-crypto = ["dep:aes", "dep:constant_time_eq", "hmac", "pbkdf2", "sha1", "getrandom", "zeroize"]
@@ -95,6 +98,7 @@ parallelism = ["libc", "num_cpus", "by_address"]
9598
xz = ["dep:lzma-rust2"]
9699
xz-static = ["lzma"]
97100
legacy-zip = ["bitstream-io"]
101+
parallelism = ["libc", "num_cpus", "by_address"]
98102
default = [
99103
"aes-crypto",
100104
"bzip2",

benches/extract.rs

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
use bencher::{benchmark_group, benchmark_main};
22

33
use bencher::Bencher;
4-
use lazy_static::lazy_static;
54
use tempdir::TempDir;
65
use tempfile::tempfile;
76

87
use std::fs;
98
use std::path::Path;
10-
use std::sync::{Arc, Mutex};
9+
use std::sync::{LazyLock, Mutex};
1110

1211
use zip::result::ZipResult;
1312
use zip::write::ZipWriter;
@@ -34,12 +33,10 @@ fn static_test_archive() -> ZipResult<ZipArchive<fs::File>> {
3433
ZipArchive::new(file)
3534
}
3635

37-
lazy_static! {
38-
static ref STATIC_TEST_ARCHIVE: Arc<Mutex<ZipArchive<fs::File>>> = {
39-
let archive = static_test_archive().unwrap();
40-
Arc::new(Mutex::new(archive))
41-
};
42-
}
36+
static STATIC_TEST_ARCHIVE: LazyLock<Mutex<ZipArchive<fs::File>>> = LazyLock::new(|| {
37+
let archive = static_test_archive().unwrap();
38+
Mutex::new(archive)
39+
});
4340

4441
/* This archive is generated dynamically, in order to scale with the number of reported CPUs.
4542
* - We want at least 768 files (4 per VCPU on EC2 *.48xlarge instances) to run in CI.
@@ -66,13 +63,11 @@ fn dynamic_test_archive(src_archive: &mut ZipArchive<fs::File>) -> ZipResult<Zip
6663
output_archive.finish_into_readable()
6764
}
6865

69-
lazy_static! {
70-
static ref DYNAMIC_TEST_ARCHIVE: Arc<Mutex<ZipArchive<fs::File>>> = {
71-
let mut src = STATIC_TEST_ARCHIVE.lock().unwrap();
72-
let archive = dynamic_test_archive(&mut src).unwrap();
73-
Arc::new(Mutex::new(archive))
74-
};
75-
}
66+
static DYNAMIC_TEST_ARCHIVE: LazyLock<Mutex<ZipArchive<fs::File>>> = LazyLock::new(|| {
67+
let mut src = STATIC_TEST_ARCHIVE.lock().unwrap();
68+
let archive = dynamic_test_archive(&mut src).unwrap();
69+
Mutex::new(archive)
70+
});
7671

7772
fn do_extract_basic(bench: &mut Bencher, archive: &mut ZipArchive<fs::File>) {
7873
let total_size: u64 = archive.decompressed_size().unwrap().try_into().unwrap();

src/read/pipelining.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -662,15 +662,16 @@ pub mod split_extraction {
662662

663663
for (entry, output_file) in compressed_receiver.iter() {
664664
/* Construct the decompressing reader. */
665-
let limited_reader = ((&mut compressed_read_end)
666-
as &mut dyn Read)
667-
.take(entry.compressed_size);
665+
let limited_reader =
666+
(&mut compressed_read_end).take(entry.compressed_size);
668667
let crypto_reader =
669668
make_crypto_reader(entry, limited_reader, None, None)?;
670669
let mut decompressing_reader = make_reader(
671670
entry.compression_method,
671+
entry.uncompressed_size,
672672
entry.crc32,
673673
crypto_reader,
674+
entry.flags,
674675
)?;
675676
let mut limited_writer = TakeWrite::take(
676677
uncompressed_write_end.by_ref(),

0 commit comments

Comments
 (0)