@@ -149,8 +149,7 @@ impl<'s> ScriptSource<'s> {
149149 let mut rest = source. content ;
150150
151151 // Whitespace may precede a frontmatter but must end with a newline
152- const WHITESPACE : [ char ; 4 ] = [ ' ' , '\t' , '\r' , '\n' ] ;
153- let trimmed = rest. trim_start_matches ( WHITESPACE ) ;
152+ let trimmed = rest. trim_start_matches ( is_whitespace) ;
154153 if trimmed. len ( ) != rest. len ( ) {
155154 let trimmed_len = rest. len ( ) - trimmed. len ( ) ;
156155 let last_trimmed_index = trimmed_len - 1 ;
@@ -184,7 +183,7 @@ impl<'s> ScriptSource<'s> {
184183 anyhow:: bail!( "no closing `{fence_pattern}` found for frontmatter" ) ;
185184 } ;
186185 let ( info, rest) = rest. split_at ( info_end_index) ;
187- let info = info. trim_matches ( WHITESPACE ) ;
186+ let info = info. trim_matches ( is_whitespace ) ;
188187 if !info. is_empty ( ) {
189188 source. info = Some ( info) ;
190189 }
@@ -202,7 +201,7 @@ impl<'s> ScriptSource<'s> {
202201 let rest = & rest[ frontmatter_nl + nl_fence_pattern. len ( ) ..] ;
203202
204203 let ( after_closing_fence, rest) = rest. split_once ( "\n " ) . unwrap_or ( ( rest, "" ) ) ;
205- let after_closing_fence = after_closing_fence. trim_matches ( WHITESPACE ) ;
204+ let after_closing_fence = after_closing_fence. trim_matches ( is_whitespace ) ;
206205 if !after_closing_fence. is_empty ( ) {
207206 // extra characters beyond the original fence pattern, even if they are extra `-`
208207 anyhow:: bail!( "trailing characters found after frontmatter close" ) ;
@@ -256,6 +255,40 @@ fn strip_shebang(input: &str) -> Option<usize> {
256255 None
257256}
258257
258+ /// True if `c` is considered a whitespace according to Rust language definition.
259+ /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
260+ /// for definitions of these classes.
261+ ///
262+ /// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
263+ fn is_whitespace ( c : char ) -> bool {
264+ // This is Pattern_White_Space.
265+ //
266+ // Note that this set is stable (ie, it doesn't change with different
267+ // Unicode versions), so it's ok to just hard-code the values.
268+
269+ matches ! (
270+ c,
271+ // Usual ASCII suspects
272+ '\u{0009}' // \t
273+ | '\u{000A}' // \n
274+ | '\u{000B}' // vertical tab
275+ | '\u{000C}' // form feed
276+ | '\u{000D}' // \r
277+ | '\u{0020}' // space
278+
279+ // NEXT LINE from latin1
280+ | '\u{0085}'
281+
282+ // Bidi markers
283+ | '\u{200E}' // LEFT-TO-RIGHT MARK
284+ | '\u{200F}' // RIGHT-TO-LEFT MARK
285+
286+ // Dedicated whitespace characters from Unicode
287+ | '\u{2028}' // LINE SEPARATOR
288+ | '\u{2029}' // PARAGRAPH SEPARATOR
289+ )
290+ }
291+
259292#[ cfg( test) ]
260293mod test_expand {
261294 use snapbox:: assert_data_eq;
0 commit comments