@@ -7,12 +7,26 @@ use thiserror::Error;
77pub enum Pep639GlobError {
88 #[ error( transparent) ]
99 PatternError ( #[ from] PatternError ) ,
10- #[ error( "The parent directory operator (`..`) at position {pos} is not allowed in license file globs" ) ]
11- ParentDirectory { pos : usize } ,
12- #[ error( "Glob contains invalid character at position {pos}: `{invalid}`" ) ]
13- InvalidCharacter { pos : usize , invalid : char } ,
14- #[ error( "Glob contains invalid character in range at position {pos}: `{invalid}`" ) ]
15- InvalidCharacterRange { pos : usize , invalid : char } ,
10+ #[ error(
11+ "The parent directory operator (`..`) at position {pos} is not allowed in glob: `{glob}`"
12+ ) ]
13+ ParentDirectory { glob : String , pos : usize } ,
14+ #[ error( "Invalid character `{invalid}` at position {pos} in glob: `{glob}`" ) ]
15+ InvalidCharacter {
16+ glob : String ,
17+ pos : usize ,
18+ invalid : char ,
19+ } ,
20+ #[ error( "Only forward slashes are allowed as path separator, invalid character at position {pos} in glob: `{glob}`" ) ]
21+ InvalidBackslash { glob : String , pos : usize } ,
22+ #[ error( "Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`" ) ]
23+ InvalidCharacterRange {
24+ glob : String ,
25+ pos : usize ,
26+ invalid : char ,
27+ } ,
28+ #[ error( "Too many at stars at position {pos} in glob: `{glob}`" ) ]
29+ TooManyStars { glob : String , pos : usize } ,
1630}
1731
1832/// Parse a PEP 639 `license-files` glob
@@ -44,37 +58,91 @@ pub enum Pep639GlobError {
4458/// > invalid. Projects MUST NOT use such values.
4559/// > Tools consuming this field MAY reject invalid values with an error.
4660pub fn parse_pep639_glob ( glob : & str ) -> Result < Pattern , Pep639GlobError > {
61+ check_pep639_glob ( glob) ?;
62+ Ok ( Pattern :: new ( glob) ?)
63+ }
64+
65+ /// Check if a glob pattern is valid according to PEP 639 rules.
66+ ///
67+ /// See [parse_pep639_glob].
68+ pub fn check_pep639_glob ( glob : & str ) -> Result < ( ) , Pep639GlobError > {
4769 let mut chars = glob. chars ( ) . enumerate ( ) . peekable ( ) ;
4870 // A `..` is on a parent directory indicator at the start of the string or after a directory
4971 // separator.
5072 let mut start_or_slash = true ;
5173 while let Some ( ( pos, c) ) = chars. next ( ) {
52- if c. is_alphanumeric ( ) || matches ! ( c, '_' | '-' | '*' | '?' ) {
74+ // `***` or `**literals` can be correctly represented with less stars. They are banned by
75+ // `glob`, they are allowed by `globset` and PEP 639 is ambiguous, so we're filtering them
76+ // out.
77+ if c == '*' {
78+ let mut star_run = 1 ;
79+ while let Some ( ( _, c) ) = chars. peek ( ) {
80+ if * c == '*' {
81+ star_run += 1 ;
82+ chars. next ( ) ;
83+ } else {
84+ break ;
85+ }
86+ }
87+ if star_run >= 3 {
88+ return Err ( Pep639GlobError :: TooManyStars {
89+ glob : glob. to_string ( ) ,
90+ // We don't update pos for the stars.
91+ pos,
92+ } ) ;
93+ } else if star_run == 2 {
94+ if let Some ( ( _, c) ) = chars. peek ( ) {
95+ if * c != '/' {
96+ return Err ( Pep639GlobError :: TooManyStars {
97+ glob : glob. to_string ( ) ,
98+ // We don't update pos for the stars.
99+ pos,
100+ } ) ;
101+ }
102+ }
103+ }
104+ start_or_slash = false ;
105+ } else if c. is_alphanumeric ( ) || matches ! ( c, '_' | '-' | '?' ) {
53106 start_or_slash = false ;
54107 } else if c == '.' {
55108 if start_or_slash && matches ! ( chars. peek( ) , Some ( ( _, '.' ) ) ) {
56- return Err ( Pep639GlobError :: ParentDirectory { pos } ) ;
109+ return Err ( Pep639GlobError :: ParentDirectory {
110+ pos,
111+ glob : glob. to_string ( ) ,
112+ } ) ;
57113 }
58114 start_or_slash = false ;
59115 } else if c == '/' {
60116 start_or_slash = true ;
61117 } else if c == '[' {
62118 for ( pos, c) in chars. by_ref ( ) {
63- // TODO: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020/98
64119 if c. is_alphanumeric ( ) || matches ! ( c, '_' | '-' | '.' ) {
65120 // Allowed.
66121 } else if c == ']' {
67122 break ;
68123 } else {
69- return Err ( Pep639GlobError :: InvalidCharacterRange { pos, invalid : c } ) ;
124+ return Err ( Pep639GlobError :: InvalidCharacterRange {
125+ glob : glob. to_string ( ) ,
126+ pos,
127+ invalid : c,
128+ } ) ;
70129 }
71130 }
72131 start_or_slash = false ;
132+ } else if c == '\\' {
133+ return Err ( Pep639GlobError :: InvalidBackslash {
134+ glob : glob. to_string ( ) ,
135+ pos,
136+ } ) ;
73137 } else {
74- return Err ( Pep639GlobError :: InvalidCharacter { pos, invalid : c } ) ;
138+ return Err ( Pep639GlobError :: InvalidCharacter {
139+ glob : glob. to_string ( ) ,
140+ pos,
141+ invalid : c,
142+ } ) ;
75143 }
76144 }
77- Ok ( Pattern :: new ( glob ) ? )
145+ Ok ( ( ) )
78146}
79147
80148#[ cfg( test) ]
@@ -87,28 +155,48 @@ mod tests {
87155 let parse_err = |glob| parse_pep639_glob ( glob) . unwrap_err ( ) . to_string ( ) ;
88156 assert_snapshot ! (
89157 parse_err( ".." ) ,
90- @"The parent directory operator (`..`) at position 0 is not allowed in license file globs "
158+ @"The parent directory operator (`..`) at position 0 is not allowed in glob: `..` "
91159 ) ;
92160 assert_snapshot ! (
93161 parse_err( "licenses/.." ) ,
94- @"The parent directory operator (`..`) at position 9 is not allowed in license file globs "
162+ @"The parent directory operator (`..`) at position 9 is not allowed in glob: `licenses/..` "
95163 ) ;
96164 assert_snapshot ! (
97165 parse_err( "licenses/LICEN!E.txt" ) ,
98- @"Glob contains invalid character at position 14: `! `"
166+ @"Invalid character `!` at position 14 in glob : `licenses/LICEN!E.txt `"
99167 ) ;
100168 assert_snapshot ! (
101169 parse_err( "licenses/LICEN[!C]E.txt" ) ,
102- @"Glob contains invalid character in range at position 15: `! `"
170+ @"Invalid character `!` in range at position 15 in glob : `licenses/LICEN[!C]E.txt `"
103171 ) ;
104172 assert_snapshot ! (
105173 parse_err( "licenses/LICEN[C?]E.txt" ) ,
106- @"Glob contains invalid character in range at position 16: `?`"
174+ @"Invalid character `?` in range at position 16 in glob: `licenses/LICEN[C?]E.txt`"
175+ ) ;
176+ assert_snapshot ! (
177+ parse_err( "******" ) ,
178+ @"Too many at stars at position 0 in glob: `******`"
179+ ) ;
180+ assert_snapshot ! (
181+ parse_err( "licenses/**license" ) ,
182+ @"Too many at stars at position 9 in glob: `licenses/**license`"
183+ ) ;
184+ assert_snapshot ! (
185+ parse_err( "licenses/***/licenses.csv" ) ,
186+ @"Too many at stars at position 9 in glob: `licenses/***/licenses.csv`"
107187 ) ;
108- assert_snapshot ! ( parse_err( "******" ) , @"Pattern syntax error near position 2: wildcards are either regular `*` or recursive `**`" ) ;
109188 assert_snapshot ! (
110189 parse_err( r"licenses\eula.txt" ) ,
111- @r"Glob contains invalid character at position 8: `\`"
190+ @r"Only forward slashes are allowed as path separator, invalid character at position 8 in glob: `licenses\eula.txt`"
191+ ) ;
192+ assert_snapshot ! (
193+ parse_err( r"**/@test" ) ,
194+ @"Invalid character `@` at position 3 in glob: `**/@test`"
195+ ) ;
196+ // Backslashes are not allowed
197+ assert_snapshot ! (
198+ parse_err( r"public domain/Gulliver\\'s Travels.txt" ) ,
199+ @r"Invalid character ` ` at position 6 in glob: `public domain/Gulliver\\'s Travels.txt`"
112200 ) ;
113201 }
114202
@@ -128,6 +216,7 @@ mod tests {
128216 "licenses/라이센스*.txt" ,
129217 "licenses/ライセンス*.txt" ,
130218 "licenses/执照*.txt" ,
219+ "src/**" ,
131220 ] ;
132221 for case in cases {
133222 parse_pep639_glob ( case) . unwrap ( ) ;
0 commit comments