Skip to content

Commit de2cb07

Browse files
committed
Implement wheel metadata parsing
1 parent 57d0358 commit de2cb07

File tree

7 files changed

+180
-29
lines changed

7 files changed

+180
-29
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ repository = "https://github.com/messense/python-pkginfo-rs"
1212
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1313

1414
[dependencies]
15+
fs-err = "2.6.0"
1516
mailparse = "0.13.4"
1617
zip = "0.5.12"

src/distribution.rs

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
use std::io::{BufReader, Read};
2+
use std::path::Path;
3+
4+
use zip::ZipArchive;
5+
6+
use crate::{Error, Metadata};
7+
8+
#[derive(Debug, Clone, Copy, PartialEq)]
9+
pub enum DistributionType {
10+
SDist,
11+
BDist,
12+
Wheel,
13+
}
14+
15+
#[derive(Debug, Clone, Copy)]
16+
enum SDistType {
17+
Zip,
18+
TarGz,
19+
}
20+
21+
#[derive(Debug, Clone)]
22+
pub struct Distribution {
23+
dist_type: DistributionType,
24+
metadata: Metadata,
25+
}
26+
27+
impl Distribution {
28+
/// Open and parse a distribution from `path`
29+
pub fn new(path: impl AsRef<Path>) -> Result<Self, Error> {
30+
let path = path.as_ref();
31+
if let Some(ext) = path.extension().and_then(|ext| ext.to_str()) {
32+
let dist_type = match ext {
33+
"zip" | "gz" => DistributionType::SDist,
34+
"egg" => DistributionType::BDist,
35+
"whl" => DistributionType::Wheel,
36+
_ => return Err(Error::UnknownDistributionType),
37+
};
38+
let metadata = match dist_type {
39+
DistributionType::SDist => {
40+
let sdist_type = match ext {
41+
"zip" => SDistType::Zip,
42+
"gz" => SDistType::TarGz,
43+
_ => return Err(Error::UnknownDistributionType),
44+
};
45+
Self::parse_sdist(path, sdist_type)
46+
}
47+
DistributionType::BDist => Self::parse_bdist(path),
48+
DistributionType::Wheel => Self::parse_wheel(path),
49+
}?;
50+
return Ok(Self {
51+
dist_type,
52+
metadata,
53+
});
54+
}
55+
Err(Error::UnknownDistributionType)
56+
}
57+
58+
/// Returns distribution type
59+
pub fn r#type(&self) -> DistributionType {
60+
self.dist_type
61+
}
62+
63+
/// Returns distribution metadata
64+
pub fn metadata(&self) -> &Metadata {
65+
&self.metadata
66+
}
67+
68+
fn parse_sdist(path: &Path, sdist_type: SDistType) -> Result<Metadata, Error> {
69+
todo!()
70+
}
71+
72+
fn parse_bdist(path: &Path) -> Result<Metadata, Error> {
73+
todo!()
74+
}
75+
76+
fn parse_wheel(path: &Path) -> Result<Metadata, Error> {
77+
let reader = BufReader::new(fs_err::File::open(path)?);
78+
let mut archive = ZipArchive::new(reader)?;
79+
let metadata_files: Vec<_> = archive
80+
.file_names()
81+
.filter(|name| name.ends_with(".dist-info/METADATA"))
82+
.map(ToString::to_string)
83+
.collect();
84+
match metadata_files.as_slice() {
85+
[] => Err(Error::MetadataNotFound),
86+
[metadata_file] => {
87+
let mut buf = Vec::new();
88+
archive.by_name(metadata_file)?.read_to_end(&mut buf)?;
89+
Metadata::parse(&buf)
90+
}
91+
_ => Err(Error::MultipleMetadataFiles(metadata_files)),
92+
}
93+
}
94+
}

src/error.rs

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,71 @@
11
use std::{error, fmt, io};
22

33
use mailparse::MailParseError;
4+
use zip::result::ZipError;
45

56
/// The error type
67
#[derive(Debug)]
78
pub enum Error {
9+
/// I/O error
10+
Io(io::Error),
811
/// mail parse error
912
MailParse(MailParseError),
13+
/// Zip parse error
14+
Zip(ZipError),
1015
/// Metadata field not found
1116
FieldNotFound(&'static str),
12-
/// I/O error
13-
Io(io::Error),
17+
/// Unknown distribution type
18+
UnknownDistributionType,
19+
/// Metadata file not found
20+
MetadataNotFound,
21+
/// Multiple metadata files found
22+
MultipleMetadataFiles(Vec<String>),
1423
}
1524

1625
impl fmt::Display for Error {
1726
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1827
match self {
28+
Error::Io(err) => err.fmt(f),
1929
Error::MailParse(err) => err.fmt(f),
30+
Error::Zip(err) => err.fmt(f),
2031
Error::FieldNotFound(key) => write!(f, "metadata field {} not found", key),
21-
Error::Io(err) => err.fmt(f),
32+
Error::UnknownDistributionType => write!(f, "unknown distribution type"),
33+
Error::MetadataNotFound => write!(f, "metadata file not found"),
34+
Error::MultipleMetadataFiles(files) => {
35+
write!(f, "found multiple metadata files: {:?}", files)
36+
}
2237
}
2338
}
2439
}
2540

2641
impl error::Error for Error {
2742
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
2843
match self {
29-
Error::MailParse(err) => Some(err),
30-
Error::FieldNotFound(_) => None,
3144
Error::Io(err) => Some(err),
45+
Error::MailParse(err) => Some(err),
46+
Error::Zip(err) => Some(err),
47+
Error::FieldNotFound(_)
48+
| Error::UnknownDistributionType
49+
| Error::MetadataNotFound
50+
| Error::MultipleMetadataFiles(_) => None,
3251
}
3352
}
3453
}
3554

55+
impl From<io::Error> for Error {
56+
fn from(err: io::Error) -> Self {
57+
Self::Io(err)
58+
}
59+
}
60+
3661
impl From<MailParseError> for Error {
3762
fn from(err: MailParseError) -> Self {
3863
Self::MailParse(err)
3964
}
4065
}
4166

42-
impl From<io::Error> for Error {
43-
fn from(err: io::Error) -> Self {
44-
Self::Io(err)
67+
impl From<ZipError> for Error {
68+
fn from(err: ZipError) -> Self {
69+
Self::Zip(err)
4570
}
4671
}

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
mod distribution;
12
mod error;
23
mod metadata;
34

5+
pub use crate::distribution::{Distribution, DistributionType};
46
pub use crate::error::Error;
57
pub use crate::metadata::Metadata;

src/metadata.rs

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,23 @@ impl Metadata {
7373
pub fn parse(content: &[u8]) -> Result<Self, Error> {
7474
let msg = mailparse::parse_mail(content)?;
7575
let headers = msg.get_headers();
76+
let get_first_value = |name| {
77+
headers.get_first_value(name).and_then(|value| {
78+
if value == "UNKNOWN" {
79+
None
80+
} else {
81+
Some(value)
82+
}
83+
})
84+
};
85+
let get_all_values = |name| {
86+
let values: Vec<String> = headers
87+
.get_all_values(name)
88+
.into_iter()
89+
.filter(|value| value != "UNKNOWN")
90+
.collect();
91+
values
92+
};
7693
let metadata_version = headers
7794
.get_first_value("Metadata-Version")
7895
.ok_or_else(|| Error::FieldNotFound("Metadata-Version"))?;
@@ -82,32 +99,32 @@ impl Metadata {
8299
let version = headers
83100
.get_first_value("Version")
84101
.ok_or_else(|| Error::FieldNotFound("Version"))?;
85-
let platforms = headers.get_all_values("Platform");
86-
let supported_platforms = headers.get_all_values("Supported-Platform");
87-
let summary = headers.get_first_value("Summary");
102+
let platforms = get_all_values("Platform");
103+
let supported_platforms = get_all_values("Supported-Platform");
104+
let summary = get_first_value("Summary");
88105
let body = msg.get_body()?;
89106
let description = if !body.trim().is_empty() {
90107
Some(body)
91108
} else {
92-
headers.get_first_value("Description")
109+
get_first_value("Description")
93110
};
94-
let keywords = headers.get_first_value("Keywords");
95-
let home_page = headers.get_first_value("Home-Page");
96-
let download_url = headers.get_first_value("Download-URL");
97-
let author = headers.get_first_value("Author");
98-
let author_email = headers.get_first_value("Author-email");
99-
let license = headers.get_first_value("License");
100-
let classifiers = headers.get_all_values("Classifier");
101-
let requires_dist = headers.get_all_values("Requires-Dist");
102-
let provides_dist = headers.get_all_values("Provides-Dist");
103-
let obsoletes_dist = headers.get_all_values("Obsoletes-Dist");
104-
let maintainer = headers.get_first_value("Maintainer");
105-
let maintainer_email = headers.get_first_value("Maintainer-email");
106-
let requires_python = headers.get_first_value("Requires-Python");
107-
let requires_external = headers.get_all_values("Requires-External");
108-
let project_urls = headers.get_all_values("Project-URL");
109-
let provides_extras = headers.get_all_values("Provides-Extra");
110-
let description_content_type = headers.get_first_value("Description-Content-Type");
111+
let keywords = get_first_value("Keywords");
112+
let home_page = get_first_value("Home-Page");
113+
let download_url = get_first_value("Download-URL");
114+
let author = get_first_value("Author");
115+
let author_email = get_first_value("Author-email");
116+
let license = get_first_value("License");
117+
let classifiers = get_all_values("Classifier");
118+
let requires_dist = get_all_values("Requires-Dist");
119+
let provides_dist = get_all_values("Provides-Dist");
120+
let obsoletes_dist = get_all_values("Obsoletes-Dist");
121+
let maintainer = get_first_value("Maintainer");
122+
let maintainer_email = get_first_value("Maintainer-email");
123+
let requires_python = get_first_value("Requires-Python");
124+
let requires_external = get_all_values("Requires-External");
125+
let project_urls = get_all_values("Project-URL");
126+
let provides_extras = get_all_values("Provides-Extra");
127+
let description_content_type = get_first_value("Description-Content-Type");
111128
Ok(Metadata {
112129
metadata_version,
113130
name,
13.8 KB
Binary file not shown.

tests/test_distribution.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
use python_pkginfo::{Distribution, DistributionType};
2+
3+
#[test]
4+
fn test_parse_wheel() {
5+
let dist = Distribution::new("tests/fixtures/build-0.4.0-py2.py3-none-any.whl").unwrap();
6+
assert_eq!(dist.r#type(), DistributionType::Wheel);
7+
let metadata = dist.metadata();
8+
assert_eq!(metadata.metadata_version, "2.1");
9+
assert_eq!(metadata.name, "build");
10+
assert!(metadata.home_page.is_none());
11+
assert!(metadata.download_url.is_none());
12+
}

0 commit comments

Comments
 (0)