Skip to content

Commit 471137f

Browse files
authored
Deprecate unexpected ZIP compression methods (#17946)
1 parent 239c12c commit 471137f

File tree

15 files changed

+199
-41
lines changed

15 files changed

+199
-41
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/uv-bin-install/src/lib.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,14 @@ async fn download_and_unpack(
340340

341341
let id = reporter.on_download_start(binary.name(), version, size);
342342
let mut progress_reader = ProgressReader::new(reader, id, reporter);
343-
stream::archive(&mut progress_reader, format.into(), temp_dir.path())
344-
.await
345-
.map_err(|e| Error::Extract { source: e })?;
343+
stream::archive(
344+
download_url,
345+
&mut progress_reader,
346+
format.into(),
347+
temp_dir.path(),
348+
)
349+
.await
350+
.map_err(|e| Error::Extract { source: e })?;
346351
reporter.on_download_complete(id);
347352

348353
// Find the binary in the extracted files

crates/uv-dev/src/validate_zip.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub(crate) async fn validate_zip(
4747

4848
let target = tempfile::TempDir::new()?;
4949

50-
uv_extract::stream::unzip(reader.compat(), target.path()).await?;
50+
uv_extract::stream::unzip(args.url.to_url(), reader.compat(), target.path()).await?;
5151

5252
Ok(())
5353
}

crates/uv-distribution/src/distribution_database.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
606606
// Create an entry for the HTTP cache.
607607
let http_entry = wheel_entry.with_file(format!("{}.http", filename.cache_key()));
608608

609+
let query_url = &url.clone();
610+
609611
let download = |response: reqwest::Response| {
610612
async {
611613
let size = size.or_else(|| content_length(&response));
@@ -634,7 +636,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
634636
let mut reader = ProgressReader::new(&mut hasher, progress, &**reporter);
635637
match extension {
636638
WheelExtension::Whl => {
637-
uv_extract::stream::unzip(&mut reader, temp_dir.path())
639+
uv_extract::stream::unzip(query_url, &mut reader, temp_dir.path())
638640
.await
639641
.map_err(|err| Error::Extract(filename.to_string(), err))?;
640642
}
@@ -647,7 +649,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
647649
}
648650
None => match extension {
649651
WheelExtension::Whl => {
650-
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
652+
uv_extract::stream::unzip(query_url, &mut hasher, temp_dir.path())
651653
.await
652654
.map_err(|err| Error::Extract(filename.to_string(), err))?;
653655
}
@@ -777,6 +779,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
777779
// Create an entry for the HTTP cache.
778780
let http_entry = wheel_entry.with_file(format!("{}.http", filename.cache_key()));
779781

782+
let query_url = &url.clone();
783+
780784
let download = |response: reqwest::Response| {
781785
async {
782786
let size = size.or_else(|| content_length(&response));
@@ -856,7 +860,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
856860

857861
match extension {
858862
WheelExtension::Whl => {
859-
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
863+
uv_extract::stream::unzip(query_url, &mut hasher, temp_dir.path())
860864
.await
861865
.map_err(|err| Error::Extract(filename.to_string(), err))?;
862866
}
@@ -1046,7 +1050,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
10461050
// Unzip the wheel to a temporary directory.
10471051
match extension {
10481052
WheelExtension::Whl => {
1049-
uv_extract::stream::unzip(&mut hasher, temp_dir.path())
1053+
uv_extract::stream::unzip(path.display(), &mut hasher, temp_dir.path())
10501054
.await
10511055
.map_err(|err| Error::Extract(filename.to_string(), err))?;
10521056
}

crates/uv-distribution/src/source/mod.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
792792
};
793793

794794
let download = |response| {
795+
let query_url = url.clone();
796+
795797
async {
796798
// At this point, we're seeing a new or updated source distribution. Initialize a
797799
// new revision, to collect the source and built artifacts.
@@ -802,7 +804,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
802804
let entry = cache_shard.shard(revision.id()).entry(SOURCE);
803805
let algorithms = hashes.algorithms();
804806
let hashes = self
805-
.download_archive(response, source, ext, entry.path(), &algorithms)
807+
.download_archive(query_url, response, source, ext, entry.path(), &algorithms)
806808
.await?;
807809

808810
Ok(revision.with_hashes(HashDigests::from(hashes)))
@@ -2242,6 +2244,8 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
22422244
};
22432245

22442246
let download = |response| {
2247+
let query_url = url.clone();
2248+
22452249
async {
22462250
// Take the union of the requested and existing hash algorithms.
22472251
let algorithms = {
@@ -2255,7 +2259,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
22552259
};
22562260

22572261
let hashes = self
2258-
.download_archive(response, source, ext, entry.path(), &algorithms)
2262+
.download_archive(query_url, response, source, ext, entry.path(), &algorithms)
22592263
.await?;
22602264
for existing in revision.hashes() {
22612265
if !hashes.contains(existing) {
@@ -2289,6 +2293,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
22892293
/// Download and unzip a source distribution into the cache from an HTTP response.
22902294
async fn download_archive(
22912295
&self,
2296+
query_url: DisplaySafeUrl,
22922297
response: Response,
22932298
source: &BuildableSource<'_>,
22942299
ext: SourceDistExtension,
@@ -2301,6 +2306,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
23012306
.bucket(CacheBucket::SourceDistributions),
23022307
)
23032308
.map_err(Error::CacheWrite)?;
2309+
23042310
let reader = response
23052311
.bytes_stream()
23062312
.map_err(std::io::Error::other)
@@ -2316,7 +2322,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
23162322

23172323
// Download and unzip the source distribution into a temporary directory.
23182324
let span = info_span!("download_source_dist", source_dist = %source);
2319-
uv_extract::stream::archive(&mut hasher, ext, temp_dir.path())
2325+
uv_extract::stream::archive(query_url, &mut hasher, ext, temp_dir.path())
23202326
.await
23212327
.map_err(|err| Error::Extract(source.to_string(), err))?;
23222328
drop(span);
@@ -2385,7 +2391,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
23852391
let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers);
23862392

23872393
// Unzip the archive into a temporary directory.
2388-
uv_extract::stream::archive(&mut hasher, ext, &temp_dir.path())
2394+
uv_extract::stream::archive(path.display(), &mut hasher, ext, &temp_dir.path())
23892395
.await
23902396
.map_err(|err| Error::Extract(temp_dir.path().to_string_lossy().into_owned(), err))?;
23912397

crates/uv-extract/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ uv-configuration = { workspace = true }
2020
uv-distribution-filename = { workspace = true }
2121
uv-pypi-types = { workspace = true }
2222
uv-static = { workspace = true }
23+
uv-warnings = { workspace = true }
2324

2425
astral-tokio-tar = { workspace = true }
2526
async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] }

crates/uv-extract/src/lib.rs

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::sync::LazyLock;
1+
use std::{fmt::Display, sync::LazyLock};
22

33
pub use error::Error;
44
use regex::Regex;
@@ -14,6 +14,64 @@ mod vendor;
1414
static CONTROL_CHARACTERS_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\p{C}").unwrap());
1515
static REPLACEMENT_CHARACTER: &str = "\u{FFFD}";
1616

17+
/// Compression methods that we consider supported.
18+
///
19+
/// Our underlying ZIP dependencies may support more.
20+
pub(crate) enum CompressionMethod {
21+
Stored,
22+
Deflated,
23+
Zstd,
24+
// NOTE: This will become `Unsupported(...)` in the future.
25+
Deprecated(&'static str),
26+
}
27+
28+
impl CompressionMethod {
29+
/// Returns `true` if this is a well-known compression method that we
30+
/// expect other ZIP implementations to support.
31+
pub(crate) fn is_well_known(&self) -> bool {
32+
matches!(self, Self::Stored | Self::Deflated | Self::Zstd)
33+
}
34+
}
35+
36+
impl Display for CompressionMethod {
37+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38+
match self {
39+
Self::Stored => write!(f, "stored"),
40+
Self::Deflated => write!(f, "DEFLATE"),
41+
Self::Zstd => write!(f, "zstd"),
42+
Self::Deprecated(name) => write!(f, "{name}"),
43+
}
44+
}
45+
}
46+
47+
impl From<async_zip::Compression> for CompressionMethod {
48+
fn from(value: async_zip::Compression) -> Self {
49+
match value {
50+
async_zip::Compression::Stored => Self::Stored,
51+
async_zip::Compression::Deflate => Self::Deflated,
52+
async_zip::Compression::Zstd => Self::Zstd,
53+
async_zip::Compression::Bz => Self::Deprecated("bzip2"),
54+
async_zip::Compression::Lzma => Self::Deprecated("lzma"),
55+
async_zip::Compression::Xz => Self::Deprecated("xz"),
56+
_ => Self::Deprecated("unknown"),
57+
}
58+
}
59+
}
60+
61+
impl From<zip::CompressionMethod> for CompressionMethod {
62+
fn from(value: zip::CompressionMethod) -> Self {
63+
match value {
64+
zip::CompressionMethod::Stored => Self::Stored,
65+
zip::CompressionMethod::Deflated => Self::Deflated,
66+
zip::CompressionMethod::Zstd => Self::Zstd,
67+
zip::CompressionMethod::Bzip2 => Self::Deprecated("bzip2"),
68+
zip::CompressionMethod::Lzma => Self::Deprecated("lzma"),
69+
zip::CompressionMethod::Xz => Self::Deprecated("xz"),
70+
_ => Self::Deprecated("unknown"),
71+
}
72+
}
73+
}
74+
1775
/// Validate that a given filename (e.g. reported by a ZIP archive's
1876
/// local file entries or central directory entries) is "safe" to use.
1977
///

crates/uv-extract/src/stream.rs

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::fmt::Display;
12
use std::path::{Component, Path, PathBuf};
23
use std::pin::Pin;
34

@@ -9,8 +10,9 @@ use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
910
use tracing::{debug, warn};
1011

1112
use uv_distribution_filename::SourceDistExtension;
13+
use uv_warnings::warn_user_once;
1214

13-
use crate::{Error, insecure_no_validate, validate_archive_member_name};
15+
use crate::{CompressionMethod, Error, insecure_no_validate, validate_archive_member_name};
1416

1517
const DEFAULT_BUF_SIZE: usize = 128 * 1024;
1618

@@ -43,7 +45,11 @@ struct ComputedEntry {
4345
/// This is useful for unzipping files as they're being downloaded. If the archive
4446
/// is already fully on disk, consider using `unzip_archive`, which can use multiple
4547
/// threads to work faster in that case.
46-
pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
48+
///
49+
/// `source_hint` is used for warning messages, to identify the source of the ZIP archive
50+
/// beneath the reader. It might be a URL, a file path, or something else.
51+
pub async fn unzip<D: Display, R: tokio::io::AsyncRead + Unpin>(
52+
source_hint: D,
4753
reader: R,
4854
target: impl AsRef<Path>,
4955
) -> Result<(), Error> {
@@ -79,8 +85,22 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
7985
let mut offset = 0;
8086

8187
while let Some(mut entry) = zip.next_with_entry().await? {
88+
let zip_entry = entry.reader().entry();
89+
90+
// Check for unexpected compression methods.
91+
// A future version of uv will reject instead of warning about these.
92+
let compression = CompressionMethod::from(zip_entry.compression());
93+
if !compression.is_well_known() {
94+
warn_user_once!(
95+
"One or more file entries in '{source_hint}' use the '{compression}' compression method, which is not widely supported. A future version of uv will reject ZIP archives containing entries compressed with this method. Entries must be compressed with the '{stored}', '{deflate}', or '{zstd}' compression methods.",
96+
stored = CompressionMethod::Stored,
97+
deflate = CompressionMethod::Deflated,
98+
zstd = CompressionMethod::Zstd,
99+
);
100+
}
101+
82102
// Construct the (expected) path to the file on-disk.
83-
let path = match entry.reader().entry().filename().as_str() {
103+
let path = match zip_entry.filename().as_str() {
84104
Ok(path) => path,
85105
Err(ZipError::StringNotUtf8) => return Err(Error::LocalHeaderNotUtf8 { offset }),
86106
Err(err) => return Err(err.into()),
@@ -107,14 +127,14 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
107127
continue;
108128
};
109129

110-
let file_offset = entry.reader().entry().file_offset();
111-
let expected_compressed_size = entry.reader().entry().compressed_size();
112-
let expected_uncompressed_size = entry.reader().entry().uncompressed_size();
113-
let expected_data_descriptor = entry.reader().entry().data_descriptor();
130+
let file_offset = zip_entry.file_offset();
131+
let expected_compressed_size = zip_entry.compressed_size();
132+
let expected_uncompressed_size = zip_entry.uncompressed_size();
133+
let expected_data_descriptor = zip_entry.data_descriptor();
114134

115135
// Either create the directory or write the file to disk.
116136
let path = target.join(&relpath);
117-
let is_dir = entry.reader().entry().dir()?;
137+
let is_dir = zip_entry.dir()?;
118138
let computed = if is_dir {
119139
if directories.insert(path.clone()) {
120140
fs_err::tokio::create_dir_all(path)
@@ -123,23 +143,23 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
123143
}
124144

125145
// If this is a directory, we expect the CRC32 to be 0.
126-
if entry.reader().entry().crc32() != 0 {
146+
if zip_entry.crc32() != 0 {
127147
if !skip_validation {
128148
return Err(Error::BadCrc32 {
129149
path: relpath.clone(),
130150
computed: 0,
131-
expected: entry.reader().entry().crc32(),
151+
expected: zip_entry.crc32(),
132152
});
133153
}
134154
}
135155

136156
// If this is a directory, we expect the uncompressed size to be 0.
137-
if entry.reader().entry().uncompressed_size() != 0 {
157+
if zip_entry.uncompressed_size() != 0 {
138158
if !skip_validation {
139159
return Err(Error::BadUncompressedSize {
140160
path: relpath.clone(),
141161
computed: 0,
142-
expected: entry.reader().entry().uncompressed_size(),
162+
expected: zip_entry.uncompressed_size(),
143163
});
144164
}
145165
}
@@ -164,7 +184,7 @@ pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
164184
{
165185
Ok(file) => {
166186
// Write the file to disk.
167-
let size = entry.reader().entry().uncompressed_size();
187+
let size = zip_entry.uncompressed_size();
168188
let mut writer = if let Ok(size) = usize::try_from(size) {
169189
tokio::io::BufWriter::with_capacity(std::cmp::min(size, 1024 * 1024), file)
170190
} else {
@@ -744,14 +764,18 @@ pub async fn untar<R: tokio::io::AsyncRead + Unpin>(
744764

745765
/// Unpack a `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.zst`, or `.tar.xz` archive into the target directory,
746766
/// without requiring `Seek`.
747-
pub async fn archive<R: tokio::io::AsyncRead + Unpin>(
767+
///
768+
/// `source_hint` is used for warning messages, to identify the source of the archive
769+
/// beneath the reader. It might be a URL, a file path, or something else.
770+
pub async fn archive<D: Display, R: tokio::io::AsyncRead + Unpin>(
771+
source_hint: D,
748772
reader: R,
749773
ext: SourceDistExtension,
750774
target: impl AsRef<Path>,
751775
) -> Result<(), Error> {
752776
match ext {
753777
SourceDistExtension::Zip => {
754-
unzip(reader, target).await?;
778+
unzip(source_hint, reader, target).await?;
755779
}
756780
SourceDistExtension::Tar => {
757781
untar(reader, target).await?;

0 commit comments

Comments
 (0)