Skip to content

Commit d09997c

Browse files
committed
feat(fe): error on ';' confusables; treat as ';'
GREEK QUESTION MARK (U+037E) looks like a semicolon: ; GREEK QUESTION MARK ; SEMICOLON Report a more helpful diagnostic when a Greek question mark is encountered in code. Also, treat the symbol as a proper semicolon during parsing.
1 parent a429162 commit d09997c

15 files changed

+128
-15
lines changed

docs/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ Semantic Versioning.
1313
* VS Code: You can now make quick-lint-js messages fun and insulting with the
1414
`quick-lint-js.snarky` setting (disabled by default). (Implemented by
1515
[vegerot][].)
16+
* Using Greek question mark (;, U+037E) instead of a semicolon (;, U+003B) now
17+
reports [E0457][] ("this is a Greek Question Mark, not a semicolon (';')").
1618
* TypeScript: Decorators on abstract classes are now parsed. ([#1194][])
1719

1820
### Fixed

po/messages.pot

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,10 @@ msgstr ""
717717
msgid "unknown JSX mode; try \"none\" or \"react\""
718718
msgstr ""
719719

720+
#: src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
721+
msgid "this is a {1}, not a {2} ('{3}')"
722+
msgstr ""
723+
720724
#: src/quick-lint-js/diag/diagnostic-metadata-generated.cpp
721725
msgid "depth limit exceeded"
722726
msgstr ""

src/quick-lint-js/diag/diagnostic-formatter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ inline void Diagnostic_Formatter<Derived>::format_message(
139139
expanded_parameter = this->expand_argument_singular(args, diagnostic, 1);
140140
} else if (curly_content == u8"2"_sv) {
141141
expanded_parameter = this->expand_argument(args, diagnostic, 2);
142+
} else if (curly_content == u8"3"_sv) {
143+
expanded_parameter = this->expand_argument(args, diagnostic, 3);
142144
} else {
143145
QLJS_ASSERT(false && "invalid message format: unrecognized placeholder");
144146
QLJS_UNREACHABLE();

src/quick-lint-js/diag/diagnostic-metadata-generated.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,23 @@ const QLJS_CONSTINIT Diagnostic_Info all_diagnostic_infos[] = {
13921392
},
13931393
},
13941394

1395+
// Diag_Confusable_Symbol
1396+
{
1397+
.code = 457,
1398+
.severity = Diagnostic_Severity::error,
1399+
.message_formats = {
1400+
QLJS_TRANSLATABLE("this is a {1}, not a {2} ('{3}')"),
1401+
},
1402+
.message_args = {
1403+
{
1404+
Diagnostic_Message_Arg_Info(offsetof(Diag_Confusable_Symbol, confusable), Diagnostic_Arg_Type::source_code_span),
1405+
Diagnostic_Message_Arg_Info(offsetof(Diag_Confusable_Symbol, confusable_name), Diagnostic_Arg_Type::string8_view),
1406+
Diagnostic_Message_Arg_Info(offsetof(Diag_Confusable_Symbol, symbol_name), Diagnostic_Arg_Type::string8_view),
1407+
Diagnostic_Message_Arg_Info(offsetof(Diag_Confusable_Symbol, symbol), Diagnostic_Arg_Type::char8),
1408+
},
1409+
},
1410+
},
1411+
13951412
// Diag_Depth_Limit_Exceeded
13961413
{
13971414
.code = 203,

src/quick-lint-js/diag/diagnostic-metadata-generated.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ namespace quick_lint_js {
101101
QLJS_DIAG_TYPE_NAME(Diag_Config_Globals_Type_Mismatch) \
102102
QLJS_DIAG_TYPE_NAME(Diag_Config_JSX_Mode_Type_Mismatch) \
103103
QLJS_DIAG_TYPE_NAME(Diag_Config_JSX_Mode_Unrecognized) \
104+
QLJS_DIAG_TYPE_NAME(Diag_Confusable_Symbol) \
104105
QLJS_DIAG_TYPE_NAME(Diag_Depth_Limit_Exceeded) \
105106
QLJS_DIAG_TYPE_NAME(Diag_Dot_Not_Allowed_After_Generic_Arguments_In_Type) \
106107
QLJS_DIAG_TYPE_NAME(Diag_Dot_Dot_Is_Not_An_Operator) \
@@ -475,7 +476,7 @@ namespace quick_lint_js {
475476
/* END */
476477
// clang-format on
477478

478-
inline constexpr int Diag_Type_Count = 461;
479+
inline constexpr int Diag_Type_Count = 462;
479480

480481
extern const Diagnostic_Info all_diagnostic_infos[Diag_Type_Count];
481482
}

src/quick-lint-js/diag/diagnostic-types-2.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,16 @@ struct Diag_Config_JSX_Mode_Unrecognized {
746746
Source_Code_Span value;
747747
};
748748

749+
struct Diag_Confusable_Symbol {
750+
[[qljs::diag("E0457", Diagnostic_Severity::error)]] //
751+
[[qljs::message("this is a {1}, not a {2} ('{3}')", ARG(confusable),
752+
ARG(confusable_name), ARG(symbol_name), ARG(symbol))]] //
753+
Source_Code_Span confusable;
754+
String8_View confusable_name;
755+
Char8 symbol;
756+
String8_View symbol_name;
757+
};
758+
749759
struct Diag_Depth_Limit_Exceeded {
750760
[[qljs::diag("E0203", Diagnostic_Severity::error)]] //
751761
[[qljs::message("depth limit exceeded", ARG(token))]] //

src/quick-lint-js/diag/diagnostic.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ struct Diagnostic_Message_Arg_Info {
8080
Diagnostic_Arg_Type type : (8 - offset_bits) QLJS_WORK_AROUND_GCC_BUG_105191;
8181
};
8282

83-
using Diagnostic_Message_Args = std::array<Diagnostic_Message_Arg_Info, 3>;
83+
using Diagnostic_Message_Args = std::array<Diagnostic_Message_Arg_Info, 4>;
8484

8585
struct Diagnostic_Info {
8686
std::array<char, 5> code_string() const;

src/quick-lint-js/fe/lex.cpp

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,20 @@ constexpr char32_t left_double_quote = U'\u201c';
9191
constexpr char32_t right_single_quote = U'\u2019';
9292
constexpr char32_t right_double_quote = U'\u201d';
9393

94+
struct Confusable_Symbol {
95+
char32_t confusable;
96+
Char8 confusable_name[20];
97+
Char8 symbol;
98+
Char8 symbol_name[20];
99+
Token_Type symbol_token_type;
100+
};
101+
102+
Confusable_Symbol confusable_symbols[] = {
103+
{0x037e, u8"Greek Question Mark", u8';', u8"semicolon",
104+
Token_Type::semicolon},
105+
// TODO(strager): Add more.
106+
};
107+
94108
bool look_up_in_unicode_table(const std::uint8_t* table, std::size_t table_size,
95109
char32_t code_point) {
96110
constexpr int bits_per_byte = 8;
@@ -1817,7 +1831,9 @@ Lexer::Parsed_Identifier Lexer::parse_identifier_slow(
18171831
: this->is_identifier_character(code_point, kind);
18181832
if (!is_legal_character) {
18191833
if (this->is_ascii_character(code_point) ||
1820-
this->is_non_ascii_whitespace_character(code_point)) {
1834+
this->is_non_ascii_whitespace_character(code_point) ||
1835+
// Confusable symbols are handled by parse_non_ascii.
1836+
this->is_confusable_symbol_character(code_point)) {
18211837
break;
18221838
} else {
18231839
this->diag_reporter_->report(Diag_Character_Disallowed_In_Identifiers{
@@ -1850,21 +1866,39 @@ QLJS_WARNING_POP
18501866
void Lexer::parse_non_ascii() {
18511867
Decode_UTF8_Result character = decode_utf_8(Padded_String_View(
18521868
this->input_, this->original_input_.null_terminator()));
1869+
// FIXME(strager): We probably need to check character.ok.
1870+
18531871
if (character.code_point == left_single_quote ||
18541872
character.code_point == right_single_quote ||
18551873
character.code_point == left_double_quote ||
18561874
character.code_point == right_double_quote) {
18571875
this->input_ = this->parse_smart_quote_string_literal(character);
18581876
this->last_token_.type = Token_Type::string;
18591877
this->last_token_.end = this->input_;
1860-
} else {
1861-
Parsed_Identifier ident = this->parse_identifier_slow(
1862-
this->input_, this->input_, Identifier_Kind::javascript);
1863-
this->input_ = ident.after;
1864-
this->last_token_.normalized_identifier = ident.normalized;
1865-
this->last_token_.end = ident.after;
1866-
this->last_token_.type = Token_Type::identifier;
1878+
return;
18671879
}
1880+
1881+
for (const Confusable_Symbol& confusable : confusable_symbols) {
1882+
if (character.code_point == confusable.confusable) {
1883+
this->input_ += character.size;
1884+
this->last_token_.end = this->input_;
1885+
this->last_token_.type = confusable.symbol_token_type;
1886+
this->diag_reporter_->report(Diag_Confusable_Symbol{
1887+
.confusable = this->last_token_.span(),
1888+
.confusable_name = confusable.confusable_name,
1889+
.symbol = confusable.symbol,
1890+
.symbol_name = confusable.symbol_name,
1891+
});
1892+
return;
1893+
}
1894+
}
1895+
1896+
Parsed_Identifier ident = this->parse_identifier_slow(
1897+
this->input_, this->input_, Identifier_Kind::javascript);
1898+
this->input_ = ident.after;
1899+
this->last_token_.normalized_identifier = ident.normalized;
1900+
this->last_token_.end = ident.after;
1901+
this->last_token_.type = Token_Type::identifier;
18681902
}
18691903

18701904
QLJS_WARNING_PUSH
@@ -2319,6 +2353,15 @@ bool Lexer::is_ascii_character(char32_t code_point) {
23192353
return code_point < 0x80;
23202354
}
23212355

2356+
bool Lexer::is_confusable_symbol_character(char32_t code_point) {
2357+
for (const Confusable_Symbol& confusable : confusable_symbols) {
2358+
if (code_point == confusable.confusable) {
2359+
return true;
2360+
}
2361+
}
2362+
return false;
2363+
}
2364+
23222365
int Lexer::newline_character_size(const Char8* input) {
23232366
if (input[0] == u8'\n' || input[0] == u8'\r') {
23242367
return 1;

src/quick-lint-js/fe/lex.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,8 @@ class Lexer {
327327
static bool is_ascii_character(Char8 code_unit);
328328
static bool is_ascii_character(char32_t code_point);
329329

330+
static bool is_confusable_symbol_character(char32_t code_point);
331+
330332
static int newline_character_size(const Char8*);
331333
static bool is_newline_character(char32_t code_point);
332334

src/quick-lint-js/i18n/translation-table-generated.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,7 @@ const Translation_Table translation_data = {
532532
{46, 25, 63, 57, 39, 51}, //
533533
{0, 0, 0, 0, 0, 52}, //
534534
{0, 0, 0, 0, 0, 27}, //
535+
{0, 0, 0, 0, 0, 33}, //
535536
{0, 0, 0, 61, 0, 61}, //
536537
{50, 25, 0, 70, 0, 78}, //
537538
{33, 21, 74, 25, 44, 21}, //
@@ -2402,6 +2403,7 @@ const Translation_Table translation_data = {
24022403
u8"switch statement is missing '{1}' around condition\0"
24032404
u8"switch statement needs parentheses around condition\0"
24042405
u8"this case will run instead\0"
2406+
u8"this is a {1}, not a {2} ('{3}')\0"
24052407
u8"this required parameter appears after the optional parameter\0"
24062408
u8"this tuple type is a named tuple type because at least one element has a name\0"
24072409
u8"this {0} looks fishy\0"

0 commit comments

Comments
 (0)