Skip to content

Commit 97ec274

Browse files
authored
Better PEP 639 from uv implementation (#32)
Since writing the initial implementation, the uv implementation got some improvements, which I'm merging back.
1 parent 1e7eb45 commit 97ec274

File tree

5 files changed

+116
-22
lines changed

5 files changed

+116
-22
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pyproject-toml"
3-
version = "0.13.4"
3+
version = "0.13.5"
44
description = "pyproject.toml parser in Rust"
55
edition = "2021"
66
license = "MIT"

Changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## 0.13.5
4+
5+
* Better PEP 639 `license-files` glob validation and error messages from uv
6+
* A public `check_pep639_glob` function for using the PEP 639 support with a different glob crate.
7+
38
## 0.13.4
49

510
* Update pep440_rs to 0.7.2

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
mod pep639_glob;
33

44
#[cfg(feature = "pep639-glob")]
5-
pub use pep639_glob::{parse_pep639_glob, Pep639GlobError};
5+
pub use pep639_glob::{check_pep639_glob, parse_pep639_glob, Pep639GlobError};
66

77
pub mod pep735_resolve;
88

src/pep639_glob.rs

Lines changed: 108 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,26 @@ use thiserror::Error;
77
pub enum Pep639GlobError {
88
#[error(transparent)]
99
PatternError(#[from] PatternError),
10-
#[error("The parent directory operator (`..`) at position {pos} is not allowed in license file globs")]
11-
ParentDirectory { pos: usize },
12-
#[error("Glob contains invalid character at position {pos}: `{invalid}`")]
13-
InvalidCharacter { pos: usize, invalid: char },
14-
#[error("Glob contains invalid character in range at position {pos}: `{invalid}`")]
15-
InvalidCharacterRange { pos: usize, invalid: char },
10+
#[error(
11+
"The parent directory operator (`..`) at position {pos} is not allowed in glob: `{glob}`"
12+
)]
13+
ParentDirectory { glob: String, pos: usize },
14+
#[error("Invalid character `{invalid}` at position {pos} in glob: `{glob}`")]
15+
InvalidCharacter {
16+
glob: String,
17+
pos: usize,
18+
invalid: char,
19+
},
20+
#[error("Only forward slashes are allowed as path separator, invalid character at position {pos} in glob: `{glob}`")]
21+
InvalidBackslash { glob: String, pos: usize },
22+
#[error("Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`")]
23+
InvalidCharacterRange {
24+
glob: String,
25+
pos: usize,
26+
invalid: char,
27+
},
28+
#[error("Too many at stars at position {pos} in glob: `{glob}`")]
29+
TooManyStars { glob: String, pos: usize },
1630
}
1731

1832
/// Parse a PEP 639 `license-files` glob
@@ -44,37 +58,91 @@ pub enum Pep639GlobError {
4458
/// > invalid. Projects MUST NOT use such values.
4559
/// > Tools consuming this field MAY reject invalid values with an error.
4660
pub fn parse_pep639_glob(glob: &str) -> Result<Pattern, Pep639GlobError> {
61+
check_pep639_glob(glob)?;
62+
Ok(Pattern::new(glob)?)
63+
}
64+
65+
/// Check if a glob pattern is valid according to PEP 639 rules.
66+
///
67+
/// See [parse_pep639_glob].
68+
pub fn check_pep639_glob(glob: &str) -> Result<(), Pep639GlobError> {
4769
let mut chars = glob.chars().enumerate().peekable();
4870
// A `..` is on a parent directory indicator at the start of the string or after a directory
4971
// separator.
5072
let mut start_or_slash = true;
5173
while let Some((pos, c)) = chars.next() {
52-
if c.is_alphanumeric() || matches!(c, '_' | '-' | '*' | '?') {
74+
// `***` or `**literals` can be correctly represented with less stars. They are banned by
75+
// `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them
76+
// out.
77+
if c == '*' {
78+
let mut star_run = 1;
79+
while let Some((_, c)) = chars.peek() {
80+
if *c == '*' {
81+
star_run += 1;
82+
chars.next();
83+
} else {
84+
break;
85+
}
86+
}
87+
if star_run >= 3 {
88+
return Err(Pep639GlobError::TooManyStars {
89+
glob: glob.to_string(),
90+
// We don't update pos for the stars.
91+
pos,
92+
});
93+
} else if star_run == 2 {
94+
if let Some((_, c)) = chars.peek() {
95+
if *c != '/' {
96+
return Err(Pep639GlobError::TooManyStars {
97+
glob: glob.to_string(),
98+
// We don't update pos for the stars.
99+
pos,
100+
});
101+
}
102+
}
103+
}
104+
start_or_slash = false;
105+
} else if c.is_alphanumeric() || matches!(c, '_' | '-' | '?') {
53106
start_or_slash = false;
54107
} else if c == '.' {
55108
if start_or_slash && matches!(chars.peek(), Some((_, '.'))) {
56-
return Err(Pep639GlobError::ParentDirectory { pos });
109+
return Err(Pep639GlobError::ParentDirectory {
110+
pos,
111+
glob: glob.to_string(),
112+
});
57113
}
58114
start_or_slash = false;
59115
} else if c == '/' {
60116
start_or_slash = true;
61117
} else if c == '[' {
62118
for (pos, c) in chars.by_ref() {
63-
// TODO: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020/98
64119
if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') {
65120
// Allowed.
66121
} else if c == ']' {
67122
break;
68123
} else {
69-
return Err(Pep639GlobError::InvalidCharacterRange { pos, invalid: c });
124+
return Err(Pep639GlobError::InvalidCharacterRange {
125+
glob: glob.to_string(),
126+
pos,
127+
invalid: c,
128+
});
70129
}
71130
}
72131
start_or_slash = false;
132+
} else if c == '\\' {
133+
return Err(Pep639GlobError::InvalidBackslash {
134+
glob: glob.to_string(),
135+
pos,
136+
});
73137
} else {
74-
return Err(Pep639GlobError::InvalidCharacter { pos, invalid: c });
138+
return Err(Pep639GlobError::InvalidCharacter {
139+
glob: glob.to_string(),
140+
pos,
141+
invalid: c,
142+
});
75143
}
76144
}
77-
Ok(Pattern::new(glob)?)
145+
Ok(())
78146
}
79147

80148
#[cfg(test)]
@@ -87,28 +155,48 @@ mod tests {
87155
let parse_err = |glob| parse_pep639_glob(glob).unwrap_err().to_string();
88156
assert_snapshot!(
89157
parse_err(".."),
90-
@"The parent directory operator (`..`) at position 0 is not allowed in license file globs"
158+
@"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`"
91159
);
92160
assert_snapshot!(
93161
parse_err("licenses/.."),
94-
@"The parent directory operator (`..`) at position 9 is not allowed in license file globs"
162+
@"The parent directory operator (`..`) at position 9 is not allowed in glob: `licenses/..`"
95163
);
96164
assert_snapshot!(
97165
parse_err("licenses/LICEN!E.txt"),
98-
@"Glob contains invalid character at position 14: `!`"
166+
@"Invalid character `!` at position 14 in glob: `licenses/LICEN!E.txt`"
99167
);
100168
assert_snapshot!(
101169
parse_err("licenses/LICEN[!C]E.txt"),
102-
@"Glob contains invalid character in range at position 15: `!`"
170+
@"Invalid character `!` in range at position 15 in glob: `licenses/LICEN[!C]E.txt`"
103171
);
104172
assert_snapshot!(
105173
parse_err("licenses/LICEN[C?]E.txt"),
106-
@"Glob contains invalid character in range at position 16: `?`"
174+
@"Invalid character `?` in range at position 16 in glob: `licenses/LICEN[C?]E.txt`"
175+
);
176+
assert_snapshot!(
177+
parse_err("******"),
178+
@"Too many at stars at position 0 in glob: `******`"
179+
);
180+
assert_snapshot!(
181+
parse_err("licenses/**license"),
182+
@"Too many at stars at position 9 in glob: `licenses/**license`"
183+
);
184+
assert_snapshot!(
185+
parse_err("licenses/***/licenses.csv"),
186+
@"Too many at stars at position 9 in glob: `licenses/***/licenses.csv`"
107187
);
108-
assert_snapshot!(parse_err("******"), @"Pattern syntax error near position 2: wildcards are either regular `*` or recursive `**`");
109188
assert_snapshot!(
110189
parse_err(r"licenses\eula.txt"),
111-
@r"Glob contains invalid character at position 8: `\`"
190+
@r"Only forward slashes are allowed as path separator, invalid character at position 8 in glob: `licenses\eula.txt`"
191+
);
192+
assert_snapshot!(
193+
parse_err(r"**/@test"),
194+
@"Invalid character `@` at position 3 in glob: `**/@test`"
195+
);
196+
// Backslashes are not allowed
197+
assert_snapshot!(
198+
parse_err(r"public domain/Gulliver\\'s Travels.txt"),
199+
@r"Invalid character ` ` at position 6 in glob: `public domain/Gulliver\\'s Travels.txt`"
112200
);
113201
}
114202

@@ -128,6 +216,7 @@ mod tests {
128216
"licenses/라이센스*.txt",
129217
"licenses/ライセンス*.txt",
130218
"licenses/执照*.txt",
219+
"src/**",
131220
];
132221
for case in cases {
133222
parse_pep639_glob(case).unwrap();

0 commit comments

Comments
 (0)