diff --git a/src/tokenize.jl b/src/tokenize.jl index 739a24c6..cbe9e271 100644 --- a/src/tokenize.jl +++ b/src/tokenize.jl @@ -225,7 +225,7 @@ end @inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F') @inline isbinary(c::Char) = c == '0' || c == '1' -@inline isoctal(c::Char) = '0' ≤ c ≤ '7' +@inline isoctal(c::Char) = '0' ≤ c ≤ '7' @inline iswhitespace(c::Char) = (isvalid(c) && Base.isspace(c)) || c === '\ufeff' struct StringState @@ -458,82 +458,82 @@ function next_token(l::Lexer, start = true) end function _next_token(l::Lexer, c) - if c == EOF_CHAR - return emit(l, K"EndMarker") - elseif iswhitespace(c) - return lex_whitespace(l, c) - elseif c == '[' + if c == '[' return emit(l, K"[") elseif c == ']' return emit(l, K"]") - elseif c == '{' - return emit(l, K"{") - elseif c == ';' - return emit(l, K";") - elseif c == '}' - return emit(l, K"}") elseif c == '(' return emit(l, K"(") elseif c == ')' return emit(l, K")") elseif c == ',' return emit(l, K",") - elseif c == '*' - return lex_star(l); - elseif c == '^' - return lex_circumflex(l); - elseif c == '@' - return emit(l, K"@") + elseif c == ';' + return emit(l, K";") elseif c == '?' return emit(l, K"?") - elseif c == '$' - return lex_dollar(l); - elseif c == '⊻' - return lex_xor(l); + elseif c == '@' + return emit(l, K"@") + elseif c == '{' + return emit(l, K"{") + elseif c == '}' + return emit(l, K"}") elseif c == '~' - return emit(l, K"~") - elseif c == '#' - return lex_comment(l) + return emit(l, K"~") + elseif iswhitespace(c) + return lex_whitespace(l, c) + elseif is_identifier_start_char(c) + return lex_identifier(l, c) + elseif isdigit(c) + return lex_digit(l, K"Integer") + elseif c == '"' + return lex_quote(l) + elseif c == '+' + return lex_plus(l) + elseif c == '-' + return lex_minus(l) + elseif c == '−' # \minus '−' treated as hyphen '-' + return emit(l, accept(l, '=') ? K"-=" : K"-") elseif c == '=' return lex_equal(l) + elseif c == '#' + return lex_comment(l) + elseif c == '*' + return lex_star(l) + elseif c == '|' + return lex_bar(l) + elseif c == '&' + return lex_amper(l) elseif c == '!' return lex_exclaim(l) elseif c == '>' return lex_greater(l) elseif c == '<' return lex_less(l) + elseif c == '.' + return lex_dot(l) elseif c == ':' return lex_colon(l) - elseif c == '|' - return lex_bar(l) - elseif c == '&' - return lex_amper(l) elseif c == '\'' return lex_prime(l) - elseif c == '÷' - return lex_division(l) - elseif c == '"' - return lex_quote(l); + elseif c == '\\' + return lex_backslash(l) elseif c == '%' - return lex_percent(l); + return lex_percent(l) elseif c == '/' - return lex_forwardslash(l); - elseif c == '\\' - return lex_backslash(l); - elseif c == '.' - return lex_dot(l); - elseif c == '+' - return lex_plus(l); - elseif c == '-' - return lex_minus(l); - elseif c == '−' # \minus '−' treated as hyphen '-' - return emit(l, accept(l, '=') ? K"-=" : K"-") + return lex_forwardslash(l) + elseif c == '÷' + return lex_division(l) elseif c == '`' - return lex_backtick(l); - elseif is_identifier_start_char(c) - return lex_identifier(l, c) - elseif isdigit(c) - return lex_digit(l, K"Integer") + return lex_backtick(l) + elseif c == '^' + return lex_circumflex(l) + elseif c == '$' + return lex_dollar(l) + elseif c == '⊻' + return lex_xor(l) + elseif c == EOF_CHAR + return emit(l, K"EndMarker") elseif (k = get(_unicode_ops, c, K"error")) != K"error" return emit(l, k) else