Skip to content

Commit 7b11341

Browse files
committed
Fix quadratic time and space when parsing 'await/'
Pathalogical code can cause quick-lint-js to consume tons of memory and tons of time. The root cause is infinite lookahead (implemented as backtracking) which occurs when parsing code like the following: function f() { await/ ()=>{{{{{{{await/ ()=>{{{{{{{await/ ()=>{{{{{{{await/ ()=>{{{{{{{await/ ()=>{{{{{{{await/ } quick-lint-js' algorithm for determining whether 'await' is supposed to be an identifier or an operator parses what follows speculatively. Because the speculative parse can parse an arbitrary amount of code, and speculative parses can be nested, we end up with quadratic behavior. Cache 'await' guesses to turn the quadratic behavior into linear behavior. This code should improve performance for pathalogical cases without changing behavior.
1 parent c4291b7 commit 7b11341

File tree

2 files changed

+78
-2
lines changed

2 files changed

+78
-2
lines changed

src/parse.cpp

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,21 @@ expression* parser::parse_await_expression(token await_token, precedence prec) {
711711
// await / rhs;
712712
case token_type::slash:
713713
case token_type::slash_equal: {
714+
parse_expression_cache_key cache_key = {
715+
.begin = this->peek().begin,
716+
.in_top_level = this->in_top_level_,
717+
.in_async_function = this->in_async_function_,
718+
.in_generator_function = this->in_generator_function_,
719+
.in_loop_statement = this->in_loop_statement_,
720+
.in_switch_statement = this->in_switch_statement_,
721+
.in_class = this->in_class_,
722+
};
723+
auto cache_it =
724+
this->await_slash_is_identifier_divide_cache_.find(cache_key);
725+
if (cache_it != this->await_slash_is_identifier_divide_cache_.end()) {
726+
return cache_it->second;
727+
}
728+
714729
buffering_error_reporter temp_error_reporter(&this->temporary_memory_);
715730
error_reporter* old_error_reporter =
716731
std::exchange(this->error_reporter_, &temp_error_reporter);
@@ -732,13 +747,19 @@ expression* parser::parse_await_expression(token await_token, precedence prec) {
732747
this->lexer_.roll_back_transaction(std::move(transaction));
733748
this->error_reporter_ = old_error_reporter;
734749

750+
bool is_identifier_result;
735751
if (this->in_top_level_) {
736752
bool parsed_slash_as_regexp = parsed_ok;
737-
return !parsed_slash_as_regexp;
753+
is_identifier_result = !parsed_slash_as_regexp;
738754
} else {
739755
bool parsed_slash_as_divide = parsed_ok;
740-
return parsed_slash_as_divide;
756+
is_identifier_result = parsed_slash_as_divide;
741757
}
758+
auto [_cache_it, inserted] =
759+
this->await_slash_is_identifier_divide_cache_.try_emplace(
760+
cache_key, is_identifier_result);
761+
QLJS_ASSERT(inserted);
762+
return is_identifier_result;
742763
}
743764

744765
case token_type::kw_of:
@@ -2275,6 +2296,26 @@ parser::function_guard::~function_guard() noexcept {
22752296
this->parser_->in_loop_statement_ = this->was_in_loop_statement_;
22762297
this->parser_->in_switch_statement_ = this->was_in_switch_statement_;
22772298
}
2299+
2300+
bool parser::parse_expression_cache_key::operator==(
2301+
const parser::parse_expression_cache_key& rhs) const noexcept {
2302+
return this->begin == rhs.begin && this->in_top_level == rhs.in_top_level &&
2303+
this->in_async_function == rhs.in_async_function &&
2304+
this->in_generator_function == rhs.in_generator_function &&
2305+
this->in_loop_statement == rhs.in_loop_statement &&
2306+
this->in_switch_statement == rhs.in_switch_statement &&
2307+
this->in_class == rhs.in_class;
2308+
}
2309+
2310+
bool parser::parse_expression_cache_key::operator!=(
2311+
const parser::parse_expression_cache_key& rhs) const noexcept {
2312+
return !(*this == rhs);
2313+
}
2314+
2315+
std::size_t parser::parse_expression_cache_key::hash::operator()(
2316+
const parse_expression_cache_key& x) const noexcept {
2317+
return std::hash<const char8*>()(x.begin);
2318+
}
22782319
}
22792320

22802321
// quick-lint-js finds bugs in JavaScript programs.

src/quick-lint-js/parse.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <quick-lint-js/parse-visitor.h>
2222
#include <quick-lint-js/token.h>
2323
#include <quick-lint-js/warning.h>
24+
#include <unordered_map>
2425
#include <utility>
2526

2627
#if QLJS_HAVE_SETJMP
@@ -3712,6 +3713,23 @@ class parser {
37123713
int old_depth_;
37133714
};
37143715

3716+
struct parse_expression_cache_key {
3717+
const char8 *begin;
3718+
bool in_top_level;
3719+
bool in_async_function;
3720+
bool in_generator_function;
3721+
bool in_loop_statement;
3722+
bool in_switch_statement;
3723+
bool in_class;
3724+
3725+
bool operator==(const parse_expression_cache_key &rhs) const noexcept;
3726+
bool operator!=(const parse_expression_cache_key &rhs) const noexcept;
3727+
3728+
struct hash {
3729+
std::size_t operator()(const parse_expression_cache_key &) const noexcept;
3730+
};
3731+
};
3732+
37153733
quick_lint_js::lexer lexer_;
37163734
error_reporter *error_reporter_;
37173735
quick_lint_js::expression_arena expressions_;
@@ -3730,6 +3748,23 @@ class parser {
37303748
bool in_switch_statement_ = false;
37313749
bool in_class_ = false;
37323750

3751+
// Cache of whether 'await' is an identifier or an operator. This cache is
3752+
// used to avoid quadratic run-time in code like the following:
3753+
//
3754+
// await / await / await / await / await
3755+
//
3756+
// (In `await/await`, `await` is an identifier. But in `await/await/`, the
3757+
// first `await` is an operator.)
3758+
//
3759+
// The value of each entry indicates the conclusion:
3760+
// * true means 'await' looks like an identifier, thus '/' is the division
3761+
// operator.
3762+
// * false means 'await' looks like an operator, thus '/' begins a regular
3763+
// expression literal.
3764+
std::unordered_map<parse_expression_cache_key, bool,
3765+
parse_expression_cache_key::hash>
3766+
await_slash_is_identifier_divide_cache_;
3767+
37333768
#if QLJS_HAVE_SETJMP
37343769
bool have_fatal_parse_error_jmp_buf_ = false;
37353770
std::jmp_buf fatal_parse_error_jmp_buf_;

0 commit comments

Comments
 (0)