Skip to content

Commit 6f0da97

Browse files
committed
fix(lexer): Only allow horizontal whitespace in frontmatter
In writing up the reference for frontmatter, I realized that we probably shouldn't be accepting Unicode Line Ending characters between the code fence and infostring or trailing after the infostring or a code fence. In digging into the unicode specification we use for Whitespace, it divides it up into categories, so I'm deferring to what it says for horizontal whitespace for what should be used within a line. Note, I am leaving out support for Unicde Default Ignorable characters. I figure that can be discussed outside of this change within the reference and tracking issue.
1 parent 428e413 commit 6f0da97

File tree

2 files changed

+21
-6
lines changed

2 files changed

+21
-6
lines changed

compiler/rustc_lexer/src/lib.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,21 @@ pub fn is_whitespace(c: char) -> bool {
350350
)
351351
}
352352

353+
/// True if `c` is considered horizontal whitespace according to Rust language definition.
354+
pub fn is_horizontal_whitespace(c: char) -> bool {
355+
// This is Pattern_White_Space.
356+
//
357+
// Note that this set is stable (ie, it doesn't change with different
358+
// Unicode versions), so it's ok to just hard-code the values.
359+
360+
matches!(
361+
c,
362+
// Horizontal space characters
363+
'\u{0009}' // tab (\t)
364+
| '\u{0020}' // space
365+
)
366+
}
367+
353368
/// True if `c` is valid as a first character of an identifier.
354369
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
355370
/// a formal definition of valid identifier name.
@@ -536,7 +551,7 @@ impl Cursor<'_> {
536551
debug_assert!(length_opening >= 3);
537552

538553
// whitespace between the opening and the infostring.
539-
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
554+
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
540555

541556
// copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like
542557
// `---Cargo.toml` as a valid opener
@@ -545,7 +560,7 @@ impl Cursor<'_> {
545560
self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.');
546561
}
547562

548-
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
563+
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
549564
let invalid_infostring = self.first() != '\n';
550565

551566
let mut found = false;
@@ -586,7 +601,7 @@ impl Cursor<'_> {
586601
// on a standalone line. Might be wrong.
587602
while let Some(closing) = rest.find("---") {
588603
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
589-
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
604+
if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) {
590605
// candidate found
591606
potential_closing = Some(closing);
592607
break;

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_contr
66
use rustc_errors::codes::*;
77
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
88
use rustc_lexer::{
9-
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
9+
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace,
1010
};
1111
use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
1212
use rustc_session::lint::BuiltinLintDiag;
@@ -597,7 +597,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
597597

598598
let last_line_start = within.rfind('\n').map_or(0, |i| i + 1);
599599
let last_line = &within[last_line_start..];
600-
let last_line_trimmed = last_line.trim_start_matches(is_whitespace);
600+
let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace);
601601
let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32);
602602

603603
let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
@@ -640,7 +640,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
640640
});
641641
}
642642

643-
if !rest.trim_matches(is_whitespace).is_empty() {
643+
if !rest.trim_matches(is_horizontal_whitespace).is_empty() {
644644
let span = self.mk_sp(last_line_start_pos, self.pos);
645645
self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
646646
}

0 commit comments

Comments
 (0)