|
1 |
| -module Lexers |
| 1 | +module Tokenize |
2 | 2 |
|
3 |
| -import ..Tokens |
4 |
| -import ..Tokens: @K_str, Token, Kind, UNICODE_OPS, EMPTY_TOKEN, |
5 |
| - isliteral, iserror, iscontextualkeyword, iswordoperator |
| 3 | +export tokenize, untokenize, Tokens |
| 4 | + |
| 5 | +using ..JuliaSyntax: Kind, @K_str |
| 6 | + |
| 7 | +import ..JuliaSyntax: kind, |
| 8 | + is_literal, is_error, is_contextual_keyword, is_word_operator |
| 9 | + |
| 10 | +import Base.eof |
6 | 11 |
|
7 | 12 | include("utilities.jl")
|
8 | 13 |
|
9 |
| -export tokenize |
| 14 | +#------------------------------------------------------------------------------- |
| 15 | +# Tokens |
| 16 | + |
| 17 | +# Error kind => description |
| 18 | +TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}( |
| 19 | + K"ErrorEofMultiComment" => "unterminated multi-line comment #= ... =#", |
| 20 | + K"ErrorEofChar" => "unterminated character literal", |
| 21 | + K"ErrorInvalidNumericConstant" => "invalid numeric constant", |
| 22 | + K"ErrorInvalidOperator" => "invalid operator", |
| 23 | + K"ErrorInvalidInterpolationTerminator" => "interpolated variable ends with invalid character; use `\$(...)` instead", |
| 24 | + K"error" => "unknown error", |
| 25 | +) |
| 26 | + |
| 27 | +struct Token |
| 28 | + kind::Kind |
| 29 | + # Offsets into a string or buffer |
| 30 | + startbyte::Int # The byte where the token start in the buffer |
| 31 | + endbyte::Int # The byte where the token ended in the buffer |
| 32 | + dotop::Bool |
| 33 | + suffix::Bool |
| 34 | +end |
| 35 | +function Token(kind::Kind, startbyte::Int, endbyte::Int) |
| 36 | + Token(kind, startbyte, endbyte, false, false) |
| 37 | +end |
| 38 | +Token() = Token(K"error", 0, 0, false, false) |
| 39 | + |
| 40 | +const EMPTY_TOKEN = Token() |
| 41 | + |
| 42 | +kind(t::Token) = t.kind |
| 43 | + |
| 44 | +startbyte(t::Token) = t.startbyte |
| 45 | +endbyte(t::Token) = t.endbyte |
| 46 | + |
| 47 | + |
| 48 | +function untokenize(t::Token, str::String) |
| 49 | + String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)]) |
| 50 | +end |
| 51 | + |
| 52 | +function Base.show(io::IO, t::Token) |
| 53 | + print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " ")) |
| 54 | + print(io, rpad(kind(t), 15, " ")) |
| 55 | +end |
| 56 | + |
| 57 | +#------------------------------------------------------------------------------- |
| 58 | +# Lexer |
10 | 59 |
|
11 | 60 | @inline ishex(c::Char) = isdigit(c) || ('a' <= c <= 'f') || ('A' <= c <= 'F')
|
12 | 61 | @inline isbinary(c::Char) = c == '0' || c == '1'
|
@@ -266,7 +315,7 @@ Returns an `K"error"` token with error `err` and starts a new `Token`.
|
266 | 315 | """
|
267 | 316 | function emit_error(l::Lexer, err::Kind = K"error")
|
268 | 317 | l.errored = true
|
269 |
| - @assert iserror(err) |
| 318 | + @assert is_error(err) |
270 | 319 | return emit(l, err)
|
271 | 320 | end
|
272 | 321 |
|
@@ -838,14 +887,14 @@ end
|
838 | 887 |
|
839 | 888 | function lex_prime(l, doemit = true)
|
840 | 889 | if l.last_token == K"Identifier" ||
|
841 |
| - iscontextualkeyword(l.last_token) || |
842 |
| - iswordoperator(l.last_token) || |
| 890 | + is_contextual_keyword(l.last_token) || |
| 891 | + is_word_operator(l.last_token) || |
843 | 892 | l.last_token == K"." ||
|
844 | 893 | l.last_token == K")" ||
|
845 | 894 | l.last_token == K"]" ||
|
846 | 895 | l.last_token == K"}" ||
|
847 | 896 | l.last_token == K"'" ||
|
848 |
| - l.last_token == K"end" || isliteral(l.last_token) |
| 897 | + l.last_token == K"end" || is_literal(l.last_token) |
849 | 898 | return emit(l, K"'")
|
850 | 899 | else
|
851 | 900 | if accept(l, '\'')
|
|
888 | 937 | # A '"' has been consumed
|
889 | 938 | function lex_quote(l::Lexer)
|
890 | 939 | raw = l.last_token == K"Identifier" ||
|
891 |
| - iscontextualkeyword(l.last_token) || |
892 |
| - iswordoperator(l.last_token) |
| 940 | + is_contextual_keyword(l.last_token) || |
| 941 | + is_word_operator(l.last_token) |
893 | 942 | pc, dpc = dpeekchar(l)
|
894 | 943 | triplestr = pc == '"' && dpc == '"'
|
895 | 944 | push!(l.string_states, StringState(triplestr, raw, '"', 0))
|
|
0 commit comments