Skip to content

Commit 8679628

Browse files
committed
Lex lifetimes using emoji and emit appropriate error
Lex and parse emoji in lifetimes, and disallow them in the parser with a hard error. Allow emoji to start a lifetime name even if they are not XID_Start. ``` error: lifetimes cannot contain emoji --> $DIR/emoji-in-lifetime.rs:1:22 | LL | fn bad_lifetime_name<'🐛🐛🐛family👨👩👧👦>( | ^^^^^^^^^^^^^^^^^^^^^ ```
1 parent 35f1109 commit 8679628

File tree

4 files changed

+50
-5
lines changed

4 files changed

+50
-5
lines changed

compiler/rustc_lexer/src/lib.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ pub enum TokenKind {
141141
/// A lifetime, e.g. `'a`.
142142
Lifetime {
143143
starts_with_number: bool,
144+
has_emoji: bool,
144145
},
145146

146147
/// `;`
@@ -975,14 +976,20 @@ impl<'a> Cursor<'a> {
975976
fn lifetime_or_char(&mut self) -> TokenKind {
976977
debug_assert!(self.prev() == '\'');
977978

979+
let mut has_emoji = false;
978980
let can_be_a_lifetime = if self.second() == '\'' {
979981
// It's surely not a lifetime.
980982
false
981983
} else {
982984
// If the first symbol is valid for identifier, it can be a lifetime.
983985
// Also check if it's a number for a better error reporting (so '0 will
984986
// be reported as invalid lifetime and not as unterminated char literal).
985-
is_id_start(self.first()) || self.first().is_ascii_digit()
987+
let c = self.first();
988+
let is_emoji = !c.is_ascii() && c.is_emoji_char();
989+
if is_emoji {
990+
has_emoji = true;
991+
}
992+
is_id_start(c) || c.is_ascii_digit() || is_emoji
986993
};
987994

988995
if !can_be_a_lifetime {
@@ -1012,7 +1019,13 @@ impl<'a> Cursor<'a> {
10121019
// First symbol can be a number (which isn't a valid identifier start),
10131020
// so skip it without any checks.
10141021
self.bump();
1015-
self.eat_while(is_id_continue);
1022+
self.eat_while(|c| {
1023+
let is_emoji = !c.is_ascii() && c.is_emoji_char();
1024+
if is_emoji {
1025+
has_emoji = true;
1026+
}
1027+
is_id_continue(c) || is_emoji
1028+
});
10161029

10171030
match self.first() {
10181031
// Check if after skipping literal contents we've met a closing
@@ -1024,7 +1037,7 @@ impl<'a> Cursor<'a> {
10241037
Literal { kind, suffix_start: self.pos_within_token() }
10251038
}
10261039
'#' if !starts_with_number => UnknownPrefixLifetime,
1027-
_ => Lifetime { starts_with_number },
1040+
_ => Lifetime { starts_with_number, has_emoji },
10281041
}
10291042
}
10301043

compiler/rustc_parse/src/lexer/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,19 +316,22 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
316316
self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal");
317317
token::Literal(token::Lit { kind, symbol, suffix })
318318
}
319-
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
319+
rustc_lexer::TokenKind::Lifetime { starts_with_number, has_emoji } => {
320320
// Include the leading `'` in the real identifier, for macro
321321
// expansion purposes. See #12512 for the gory details of why
322322
// this is necessary.
323323
let lifetime_name = nfc_normalize(self.str_from(start));
324324
self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
325+
let span = self.mk_sp(start, self.pos);
325326
if starts_with_number {
326-
let span = self.mk_sp(start, self.pos);
327327
self.dcx()
328328
.struct_err("lifetimes cannot start with a number")
329329
.with_span(span)
330330
.stash(span, StashKey::LifetimeIsChar);
331331
}
332+
if has_emoji {
333+
self.dcx().struct_span_err(span, "lifetimes cannot contain emoji").emit();
334+
}
332335
token::Lifetime(lifetime_name, IdentIsRaw::No)
333336
}
334337
rustc_lexer::TokenKind::RawLifetime => {
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// #141081
2+
fn bad_lifetime_name<'🐛🐛🐛family👨‍👩‍👧‍👦>(_: &'🐛🐛🐛family👨‍👩‍👧‍👦 ()) {}
3+
//~^ ERROR: lifetimes cannot contain emoji
4+
//~| ERROR: lifetimes cannot contain emoji
5+
fn main() {
6+
'🐛: { //~ ERROR: lifetimes cannot contain emoji
7+
todo!();
8+
};
9+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
error: lifetimes cannot contain emoji
2+
--> $DIR/emoji-in-lifetime.rs:2:22
3+
|
4+
LL | fn bad_lifetime_name<'🐛🐛🐛family👨👩👧👦>(_: &'🐛🐛🐛family👨👩👧👦 ()) {}
5+
| ^^^^^^^^^^^^^^^^^^^^^
6+
7+
error: lifetimes cannot contain emoji
8+
--> $DIR/emoji-in-lifetime.rs:2:45
9+
|
10+
LL | fn bad_lifetime_name<'🐛🐛🐛family👨👩👧👦>(_: &'🐛🐛🐛family👨👩👧👦 ()) {}
11+
| ^^^^^^^^^^^^^^^^^^^^^
12+
13+
error: lifetimes cannot contain emoji
14+
--> $DIR/emoji-in-lifetime.rs:6:5
15+
|
16+
LL | '🐛: {
17+
| ^^^
18+
19+
error: aborting due to 3 previous errors
20+

0 commit comments

Comments
 (0)