Skip to content

Commit 0a463a8

Browse files
committed
fix(parser): fix bad perf and wrong precedece of await
We sometimes parse await expresesions twice if we suspect that the 'await' should have been 'async' instead. This can lead to a lot of backtracking, causing very bad performance on inputs like the following: await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => await a => a Fix this performance issue by rewriting the logic to avoid backtracking altogether. Instead, interweave the logic in parse_async_expression. As a side effect, this change fixes a bug where binary operators after the 'await' were incorrectly consumed as part of the await expression. Benchmark Time CPU Time Old Time New CPU Old CPU New ---------------------------------------------------------------------------------------------------------------------------------------------- benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213495925 1672 213458684 1672 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213580205 1672 213542936 1672 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213228540 1680 213188854 1679 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 212768030 1690 212733692 1690 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213213714 1689 213173590 1689 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213519479 1696 213483344 1696 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213759724 1698 213724009 1698 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 214259949 1695 214218748 1695 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213101099 1690 213062912 1690 benchmark_parse/pathological_await_arrow -1.0000 -1.0000 213137875 1717 213100767 1717 benchmark_parse/pathological_await_arrow_pvalue 0.0002 0.0002 U Test, Repetitions: 10 vs 10 benchmark_parse/pathological_await_arrow_mean -1.0000 -1.0000 213406454 1690 213368753 1690 benchmark_parse/pathological_await_arrow_median -1.0000 -1.0000 213362232 1690 213323769 1690 benchmark_parse/pathological_await_arrow_stddev -1.0000 -1.0000 414459 13 413571 13 benchmark_parse/pathological_await_arrow_cv +3.0919 +3.0997 0 0 0 0 OVERALL_GEOMEAN -1.0000 -1.0000 0 0 0 0
1 parent 799588d commit 0a463a8

File tree

4 files changed

+109
-66
lines changed

4 files changed

+109
-66
lines changed

src/quick-lint-js/fe/parse-expression.cpp

Lines changed: 87 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -714,14 +714,16 @@ expression* parser::parse_async_expression(parse_visitor_base& v,
714714
return this->parse_expression_remainder(v, ast, prec);
715715
}
716716

717-
expression* parser::parse_async_expression_only(parse_visitor_base& v,
718-
const token& async_token,
719-
bool allow_in_operator) {
720-
const char8* async_begin = async_token.begin;
717+
expression* parser::parse_async_expression_only(
718+
parse_visitor_base& v, const token& async_or_await_token,
719+
bool allow_in_operator) {
720+
bool is_async = async_or_await_token.type == token_type::kw_async;
721+
bool is_await = async_or_await_token.type == token_type::kw_await;
722+
QLJS_ASSERT(is_async || is_await);
723+
const char8* async_begin = async_or_await_token.begin;
721724

722725
auto parse_arrow_function_arrow_and_body =
723-
[this, allow_in_operator, async_begin, &v](
724-
auto&& parameters, buffering_visitor* parameter_visits,
726+
[&](auto&& parameters, buffering_visitor* parameter_visits,
725727
buffering_visitor* return_type_visits) {
726728
if (this->peek().type == token_type::equal_greater) {
727729
this->skip();
@@ -731,6 +733,11 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
731733
.where = this->peek().span(),
732734
});
733735
}
736+
if (is_await) {
737+
this->diag_reporter_->report(diag_await_followed_by_arrow_function{
738+
.await_operator = async_or_await_token.span(),
739+
});
740+
}
734741

735742
v.visit_enter_function_scope();
736743
if (parameter_visits) {
@@ -746,11 +753,14 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
746753
};
747754

748755
switch (this->peek().type) {
749-
// async () => {} // Arrow function.
750-
// async() // Function call.
756+
// async () => {} // Arrow function.
757+
// async() // Function call.
758+
// await (promise) // Unary operator.
759+
// await (param) => {} // Arrow function (invalid).
751760
case token_type::left_paren: {
752761
bool newline_after_async = this->peek().has_leading_newline;
753762

763+
lexer_transaction transaction = this->lexer_.begin_transaction();
754764
source_code_span left_paren_span = this->peek().span();
755765
this->skip();
756766
expression_arena::vector<expression*> parameters =
@@ -771,10 +781,11 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
771781
!this->peek().has_leading_newline &&
772782
this->peek().type == token_type::left_curly;
773783
if (is_arrow_function || is_arrow_function_without_arrow) {
784+
this->lexer_.commit_transaction(std::move(transaction));
774785
if (newline_after_async) {
775786
this->diag_reporter_->report(
776787
diag_newline_not_allowed_between_async_and_parameter_list{
777-
.async = async_token.span(),
788+
.async = async_or_await_token.span(),
778789
.arrow = this->peek().span(),
779790
});
780791
}
@@ -793,15 +804,16 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
793804
return parse_arrow_function_arrow_and_body(std::move(parameters),
794805
/*parameter_visits=*/nullptr,
795806
&return_type_visits);
796-
} else {
807+
} else if (is_async) {
797808
// async as an identifier (variable reference)
798809
// Function call: async(arg)
810+
this->lexer_.commit_transaction(std::move(transaction));
799811
// TODO(strager): Reduce copying of the arguments.
800812
expression_arena::vector<expression*> call_children(
801813
"parse_expression async call children",
802814
this->expressions_.allocator());
803815
call_children.emplace_back(this->make_expression<expression::variable>(
804-
async_token.identifier_name(), async_token.type));
816+
async_or_await_token.identifier_name(), async_or_await_token.type));
805817
for (std::size_t i = 0; i < parameters.size(); ++i) {
806818
if (parameters.data()[i]->kind() != expression_kind::_invalid) {
807819
call_children.emplace_back(parameters.data()[i]);
@@ -813,6 +825,10 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
813825
/*left_paren_span=*/left_paren_span,
814826
/*span_end=*/right_paren_span.end());
815827
return call_ast;
828+
} else if (is_await) {
829+
// await is an operator.
830+
this->lexer_.roll_back_transaction(std::move(transaction));
831+
goto await_operator;
816832
}
817833

818834
QLJS_UNREACHABLE();
@@ -822,6 +838,7 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
822838
// async <T>() => {} // Arrow function. TypeScript only.
823839
// async<T>() // Function call. TypeScript only.
824840
// async<T> // Variable with generic arguments. TypeScript only.
841+
// TODO(#832): Parse generic arrow with 'await' keyword.
825842
case token_type::less:
826843
if (this->options_.typescript) {
827844
parser_transaction transaction = this->begin_transaction();
@@ -882,20 +899,26 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
882899
// strict mode.
883900
[[fallthrough]];
884901
// async parameter => expression-or-block // Arrow function.
902+
// await parameter => expression-or-block // Arrow function (invalid).
903+
// await promise // Unary operator.
885904
QLJS_CASE_CONTEXTUAL_KEYWORD:
886905
case token_type::identifier:
887906
case token_type::kw_await:
888907
case token_type::kw_yield: {
889-
if (this->peek().has_leading_newline) {
908+
if (is_async && this->peek().has_leading_newline) {
890909
goto variable_reference;
891910
}
892911

893-
if (this->peek().type == token_type::kw_await) {
912+
bool parameter_is_await = this->peek().type == token_type::kw_await;
913+
if (parameter_is_await && is_async) {
894914
// async await => {} // Invalid
895915
this->diag_reporter_->report(diag_cannot_declare_await_in_async_function{
896916
.name = this->peek().identifier_name(),
897917
});
898918
}
919+
920+
lexer_transaction transaction = this->lexer_.begin_transaction();
921+
899922
const char8* parameter_begin = this->peek().begin;
900923
std::array<expression*, 1> parameters = {
901924
this->make_expression<expression::variable>(
@@ -919,25 +942,69 @@ expression* parser::parse_async_expression_only(parse_visitor_base& v,
919942
});
920943
}
921944

945+
if (this->peek().type != token_type::equal_greater && is_await) {
946+
this->lexer_.roll_back_transaction(std::move(transaction));
947+
if (parameter_is_await) {
948+
this->diag_reporter_->report(diag_redundant_await{
949+
.await_operator = async_or_await_token.span(),
950+
});
951+
}
952+
goto await_operator;
953+
}
954+
this->lexer_.commit_transaction(std::move(transaction));
922955
return parse_arrow_function_arrow_and_body(std::move(parameters),
923956
/*parameter_visits=*/nullptr,
924957
/*return_type_visits=*/nullptr);
925958
}
926959

927960
// async function f(parameters) { statements; }
961+
// await function f() {}
928962
case token_type::kw_function: {
963+
if (is_await) {
964+
goto await_operator;
965+
}
929966
expression* function = this->parse_function_expression(
930967
v, function_attributes::async, async_begin);
931968
return function;
932969
}
933970

934971
// async // Identifier (variable reference).
972+
// await p
973+
// await []
974+
await_operator:
935975
variable_reference:
936-
default: {
937-
expression* ast = this->make_expression<expression::variable>(
938-
async_token.identifier_name(), async_token.type);
939-
return ast;
940-
}
976+
default:
977+
if (is_async) {
978+
expression* ast = this->make_expression<expression::variable>(
979+
async_or_await_token.identifier_name(), async_or_await_token.type);
980+
return ast;
981+
} else if (is_await) {
982+
if (!(this->in_async_function_ || this->in_top_level_)) {
983+
this->diag_reporter_->report(diag_await_operator_outside_async{
984+
.await_operator = async_or_await_token.span(),
985+
});
986+
}
987+
988+
expression* child =
989+
this->parse_expression(v, precedence{
990+
.binary_operators = true,
991+
.math_or_logical_or_assignment = false,
992+
.equals_assignment = false,
993+
.commas = false,
994+
.in_operator = false,
995+
.colon_type_annotation = false,
996+
.trailing_curly_is_arrow_body = false,
997+
.conditional_operator = false,
998+
});
999+
if (child->kind() == expression_kind::_missing) {
1000+
this->diag_reporter_->report(diag_missing_operand_for_operator{
1001+
.where = async_or_await_token.span(),
1002+
});
1003+
}
1004+
return this->make_expression<expression::await>(
1005+
child, async_or_await_token.span());
1006+
}
1007+
QLJS_UNREACHABLE();
9411008
}
9421009

9431010
QLJS_UNREACHABLE();
@@ -1070,51 +1137,8 @@ expression* parser::parse_await_expression(parse_visitor_base& v,
10701137
return this->make_expression<expression::variable>(
10711138
await_token.identifier_name(), await_token.type);
10721139
} else {
1073-
source_code_span operator_span = await_token.span();
1074-
1075-
expression* result;
1076-
this->try_parse(
1077-
[&] {
1078-
buffering_visitor& temp_visits =
1079-
this->buffering_visitor_stack_.emplace(
1080-
boost::container::pmr::new_delete_resource());
1081-
1082-
expression* child = this->parse_expression(temp_visits, prec);
1083-
1084-
if (child->kind() == expression_kind::_missing) {
1085-
this->diag_reporter_->report(diag_missing_operand_for_operator{
1086-
.where = operator_span,
1087-
});
1088-
} else if (child->kind() == expression_kind::arrow_function &&
1089-
child->attributes() != function_attributes::async) {
1090-
// await (param) => { } // Invalid.
1091-
return false;
1092-
}
1093-
1094-
if (!(this->in_async_function_ || this->in_top_level_)) {
1095-
this->diag_reporter_->report(diag_await_operator_outside_async{
1096-
.await_operator = operator_span,
1097-
});
1098-
} else if (child->kind() == expression_kind::await) {
1099-
// await await
1100-
this->diag_reporter_->report(diag_redundant_await{
1101-
.await_operator = operator_span,
1102-
});
1103-
}
1104-
1105-
result =
1106-
this->make_expression<expression::await>(child, operator_span);
1107-
temp_visits.move_into(v);
1108-
return true;
1109-
},
1110-
[&] {
1111-
this->diag_reporter_->report(diag_await_followed_by_arrow_function{
1112-
.await_operator = operator_span,
1113-
});
1114-
// Re-parse as if the user wrote 'async' instead of 'await'.
1115-
result = this->parse_async_expression(v, await_token, prec);
1116-
});
1117-
return result;
1140+
return this->parse_async_expression_only(
1141+
v, await_token, /*allow_in_operator=*/prec.in_operator);
11181142
}
11191143
}
11201144

src/quick-lint-js/fe/parse.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,8 +432,12 @@ class parser {
432432
expression *parse_primary_expression(parse_visitor_base &, precedence);
433433
expression *parse_async_expression(parse_visitor_base &,
434434
const token &async_token, precedence);
435+
// Parses either:
436+
// * an expression starting with 'async', or
437+
// * an expression starting with 'await' where we determined that it is either
438+
// a unary operator or a mistyped 'async'.
435439
expression *parse_async_expression_only(parse_visitor_base &,
436-
const token &async_token,
440+
const token &async_or_await_token,
437441
bool allow_in_operator);
438442
expression *parse_await_expression(parse_visitor_base &,
439443
const token &await_token, precedence prec);

test/test-parse-expression.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,9 @@ TEST_F(test_parse_expression, await_followed_by_arrow_function) {
967967
p.code, diag_await_followed_by_arrow_function, //
968968
await_operator, 0, u8"await")));
969969
}
970+
971+
// TODO(strager): Test an arrow function with TypeScript a return type
972+
// annotation.
970973
};
971974

972975
{
@@ -1031,8 +1034,10 @@ TEST_F(test_parse_expression,
10311034
{u8"await/re/g"_sv, "binary(var await, var re, var g)", "await(literal)"},
10321035
{u8"await+x"_sv, "binary(var await, var x)", "await(unary(var x))"},
10331036
{u8"await-x"_sv, "binary(var await, var x)", "await(unary(var x))"},
1034-
{u8"await<x>y</x>/g"_sv, "binary(var await, var x, var y, literal)", "await(binary(jsxelement(x), var g))", jsx},
1037+
{u8"await<x>y</x>/g"_sv, "binary(var await, var x, var y, literal)", "binary(await(jsxelement(x)), var g)", jsx},
10351038
{u8"await(x)"_sv, "call(var await, var x)", "await(paren(var x))"},
1039+
{u8"await(x,)"_sv, "call(var await, var x)", "await(paren(trailingcomma(var x)))"},
1040+
{u8"await(x, y)"_sv, "call(var await, var x, var y)", "await(paren(binary(var x, var y)))"},
10361041
{u8"await[x]"_sv, "index(var await, var x)", "await(array(var x))"},
10371042
{u8"await++\nx"_sv, "rwunarysuffix(var await)", "await(rwunary(var x))"},
10381043
{u8"await--\nx"_sv, "rwunarysuffix(var await)", "await(rwunary(var x))"},

test/test-parse.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,6 @@ TEST_F(test_overflow, parser_depth_limit_exceeded) {
826826
repeated_str(u8"while(true) ", u8"10", u8"",
827827
parser::stack_limit + 1),
828828
repeated_str(u8"for(;;) ", u8"10", u8"", parser::stack_limit + 1),
829-
repeated_str(u8"await ", u8"10", u8"", parser::stack_limit + 1),
830829
repeated_str(u8"if(true) ", u8"10", u8"", parser::stack_limit + 1),
831830
repeated_str(u8"function f() { ", u8"", u8"}",
832831
parser::stack_limit + 1),
@@ -852,6 +851,17 @@ TEST_F(test_overflow, parser_depth_limit_exceeded) {
852851
EXPECT_THAT(v.errors, ElementsAre(DIAG_TYPE(diag_depth_limit_exceeded)));
853852
}
854853

854+
{
855+
padded_string code(
856+
repeated_str(u8"await ", u8"10", u8"", parser::stack_limit + 1));
857+
spy_visitor v;
858+
parser p(&code, &v);
859+
bool ok = p.parse_and_visit_module_catching_fatal_parse_errors(v);
860+
EXPECT_FALSE(ok);
861+
EXPECT_THAT(v.errors,
862+
::testing::Contains(DIAG_TYPE(diag_depth_limit_exceeded)));
863+
}
864+
855865
{
856866
padded_string code(
857867
u8"(" + repeated_str(u8"{x:", u8"", u8"}", parser::stack_limit + 1) +

0 commit comments

Comments
 (0)