diff --git a/src/JuliaSyntax.jl b/src/JuliaSyntax.jl index 3c276984..da5861c0 100644 --- a/src/JuliaSyntax.jl +++ b/src/JuliaSyntax.jl @@ -79,29 +79,30 @@ export SyntaxNode # Helper utilities include("utils.jl") -include("kinds.jl") +include("julia/kinds.jl") # Lexing uses a significantly modified version of Tokenize.jl -include("tokenize.jl") +include("julia/tokenize.jl") # Source and diagnostics -include("source_files.jl") -include("diagnostics.jl") +include("core/source_files.jl") +include("core/diagnostics.jl") # Parsing -include("parse_stream.jl") -include("parser.jl") -include("parser_api.jl") -include("literal_parsing.jl") +include("core/parse_stream.jl") +include("core/tree_cursors.jl") +include("julia/julia_parse_stream.jl") +include("julia/parser.jl") +include("julia/parser_api.jl") +include("julia/literal_parsing.jl") # Tree data structures -include("tree_cursors.jl") -include("green_node.jl") -include("syntax_tree.jl") -include("expr.jl") +include("porcelain/green_node.jl") +include("porcelain/syntax_tree.jl") +include("integration/expr.jl") # Hooks to integrate the parser with Base -include("hooks.jl") +include("integration/hooks.jl") include("precompile.jl") end diff --git a/src/diagnostics.jl b/src/core/diagnostics.jl similarity index 100% rename from src/diagnostics.jl rename to src/core/diagnostics.jl diff --git a/src/parse_stream.jl b/src/core/parse_stream.jl similarity index 77% rename from src/parse_stream.jl rename to src/core/parse_stream.jl index 1000fdaa..fd66b2b4 100644 --- a/src/parse_stream.jl +++ b/src/core/parse_stream.jl @@ -9,93 +9,11 @@ const EMPTY_FLAGS = RawFlags(0) # Set for tokens or ranges which are syntax trivia after parsing const TRIVIA_FLAG = RawFlags(1<<0) -# Token flags - may be set for operator kinded tokens -# Operator is dotted -const DOTOP_FLAG = RawFlags(1<<1) -# Operator has a suffix -const SUFFIXED_FLAG = RawFlags(1<<2) - -# Set for K"call", K"dotcall" or any syntactic operator heads -# Distinguish various syntaxes which are mapped to K"call" -const PREFIX_CALL_FLAG = RawFlags(0<<3) -const INFIX_FLAG = RawFlags(1<<3) -const PREFIX_OP_FLAG = RawFlags(2<<3) -const POSTFIX_OP_FLAG = RawFlags(3<<3) - -# The following flags are quite head-specific and may overlap - -""" -Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` -""" -const TRIPLE_STRING_FLAG = RawFlags(1<<5) - -""" -Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping -""" -const RAW_STRING_FLAG = RawFlags(1<<6) - -""" -Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses -""" -const PARENS_FLAG = RawFlags(1<<5) - -""" -Set for various delimited constructs when they contains a trailing comma. For -example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where -this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. -""" -const TRAILING_COMMA_FLAG = RawFlags(1<<6) - -""" -Set for K"quote" for the short form `:x` as opposed to long form `quote x end` -""" -const COLON_QUOTE = RawFlags(1<<5) - -""" -Set for K"toplevel" which is delimited by parentheses -""" -const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) - -""" -Set for K"function" in short form definitions such as `f() = 1` -""" -const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5) - -""" -Set for K"struct" when mutable -""" -const MUTABLE_FLAG = RawFlags(1<<5) - -""" -Set for K"module" when it's not bare (`module`, not `baremodule`) -""" -const BARE_MODULE_FLAG = RawFlags(1<<5) - """ Set for nodes that are non-terminals """ const NON_TERMINAL_FLAG = RawFlags(1<<7) -# Flags holding the dimension of an nrow or other UInt8 not held in the source -# TODO: Given this is only used for nrow/ncat, we could actually use all the flags? -const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) - -function set_numeric_flags(n::Integer) - f = RawFlags((n << 8) & NUMERIC_FLAGS) - if numeric_flags(f) != n - error("Numeric flags unable to hold large integer $n") - end - f -end - -function call_type_flags(f::RawFlags) - f & 0b11000 -end - -function numeric_flags(f::RawFlags) - Int((f >> 8) % UInt8) -end - function remove_flags(n::RawFlags, fs...) RawFlags(n & ~(RawFlags((|)(fs...)))) end @@ -138,47 +56,6 @@ function Base.summary(head::SyntaxHead) untokenize(head, unique=false, include_flag_suff=false) end -function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) - str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) : - untokenize(kind(head); unique=unique))::String - if is_dotted(head) - str = "."*str - end - if include_flag_suff - # Ignore DOTOP_FLAG - it's represented above with . prefix - is_trivia(head) && (str = str*"-t") - is_infix_op_call(head) && (str = str*"-i") - is_prefix_op_call(head) && (str = str*"-pre") - is_postfix_op_call(head) && (str = str*"-post") - - k = kind(head) - if k in KSet"string cmdstring Identifier" - has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") - has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") - elseif k in KSet"tuple block macrocall" - has_flags(head, PARENS_FLAG) && (str = str*"-p") - elseif k == K"quote" - has_flags(head, COLON_QUOTE) && (str = str*"-:") - elseif k == K"toplevel" - has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") - elseif k == K"function" - has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") - elseif k == K"struct" - has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") - elseif k == K"module" - has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") - end - if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && - has_flags(head, TRAILING_COMMA_FLAG) - str *= "-," - end - is_suffixed(head) && (str = str*"-suf") - n = numeric_flags(head) - n != 0 && (str = str*"-"*string(n)) - end - str -end - #------------------------------------------------------------------------------- # Generic interface for types `T` which have kind and flags. Either: # 1. Define kind(::T) and flags(::T), or @@ -200,65 +77,6 @@ invisible to the parser (eg, whitespace) or implied by the structure of the AST """ is_trivia(x) = has_flags(x, TRIVIA_FLAG) -""" - is_prefix_call(x) - -Return true for normal prefix function call syntax such as the `f` call node -parsed from `f(x)`. -""" -is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG - -""" - is_infix_op_call(x) - -Return true for infix operator calls such as the `+` call node parsed from -`x + y`. -""" -is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG - -""" - is_prefix_op_call(x) - -Return true for prefix operator calls such as the `+` call node parsed from `+x`. -""" -is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG - -""" - is_postfix_op_call(x) - -Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`. -""" -is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG - -""" - is_dotted(x) - -Return true for dotted syntax tokens -""" -is_dotted(x) = has_flags(x, DOTOP_FLAG) - -""" - is_suffixed(x) - -Return true for operators which have suffixes, such as `+₁` -""" -is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) - -""" - is_decorated(x) - -Return true for operators which are decorated with a dot or suffix. -""" -is_decorated(x) = is_dotted(x) || is_suffixed(x) - -""" - numeric_flags(x) - -Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"` -and `K"ncat"`, for now. -""" -numeric_flags(x) = numeric_flags(flags(x)) - #------------------------------------------------------------------------------- """ `SyntaxToken` is a token covering a contiguous byte range in the input text. @@ -280,7 +98,6 @@ function Base.show(io::IO, tok::SyntaxToken) end head(tok::SyntaxToken) = tok.head -flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS) preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace @@ -962,45 +779,6 @@ function bump_glue(stream::ParseStream, kind, flags) return position(stream) end -""" - bump_split(stream, token_spec1, [token_spec2 ...]) - -Bump the next token, splitting it into several pieces - -Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. -If all `nbyte` are positive, the sum must equal the token length. If one -`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of -all `nbyte` must equal zero. - -This is a hack which helps resolves the occasional lexing ambiguity. For -example -* Whether .+ should be a single token or the composite (. +) which is used for - standalone operators. -* Whether ... is splatting (most of the time) or three . tokens in import paths - -TODO: Are these the only cases? Can we replace this general utility with a -simpler one which only splits preceding dots? -""" -function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} - tok = stream.lookahead[stream.lookahead_index] - stream.lookahead_index += 1 - start_b = _next_byte(stream) - toklen = tok.next_byte - start_b - prev_b = start_b - for (i, (nbyte, k, f)) in enumerate(split_spec) - h = SyntaxHead(k, f) - actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte - orig_k = k == K"." ? K"." : kind(tok) - node = RawGreenNode(h, actual_nbyte, orig_k) - push!(stream.output, node) - prev_b += actual_nbyte - stream.next_byte += actual_nbyte - end - @assert tok.next_byte == prev_b - stream.peek_count = 0 - return position(stream) -end - """ Reset kind or flags of an existing node in the output stream @@ -1129,98 +907,6 @@ function emit_diagnostic(diagnostics::AbstractVector{Diagnostic}, push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...)) end -#------------------------------------------------------------------------------- -# ParseStream Post-processing - -function validate_tokens(stream::ParseStream) - txtbuf = unsafe_textbuf(stream) - charbuf = IOBuffer() - - # Process terminal nodes in the output - fbyte = stream.output[1].byte_span+1 # Start after sentinel - for i = 2:length(stream.output) - node = stream.output[i] - if !is_terminal(node) || kind(node) == K"TOMBSTONE" - continue - end - - k = kind(node) - nbyte = fbyte + node.byte_span - tokrange = fbyte:nbyte-1 - error_kind = K"None" - - if k in KSet"Integer BinInt OctInt HexInt" - # The following shouldn't be able to error... - # parse_int_literal - # parse_uint_literal - elseif k == K"Float" || k == K"Float32" - underflow0 = false - if k == K"Float" - x, code = parse_float_literal(Float64, txtbuf, fbyte, nbyte) - # jl_strtod_c can return "underflow" even for valid cases such - # as `5e-324` where the source is an exact representation of - # `x`. So only warn when underflowing to zero. - underflow0 = code === :underflow && x == 0 - else - x, code = parse_float_literal(Float32, txtbuf, fbyte, nbyte) - underflow0 = code === :underflow && x == 0 - end - if code === :ok - # pass - elseif code === :overflow - emit_diagnostic(stream, tokrange, - error="overflow in floating point literal") - error_kind = K"ErrorNumericOverflow" - elseif underflow0 - emit_diagnostic(stream, tokrange, - warning="underflow to zero in floating point literal") - end - elseif k == K"Char" - @assert fbyte < nbyte # Already handled in the parser - truncate(charbuf, 0) - had_error = unescape_julia_string(charbuf, txtbuf, fbyte, - nbyte, stream.diagnostics) - if had_error - error_kind = K"ErrorInvalidEscapeSequence" - else - seek(charbuf,0) - read(charbuf, Char) - if !eof(charbuf) - error_kind = K"ErrorOverLongCharacter" - emit_diagnostic(stream, tokrange, - error="character literal contains multiple characters") - end - end - elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG) - had_error = unescape_julia_string(devnull, txtbuf, fbyte, - nbyte, stream.diagnostics) - if had_error - error_kind = K"ErrorInvalidEscapeSequence" - end - elseif is_error(k) && k != K"error" - # Emit messages for non-generic token errors - tokstr = String(txtbuf[tokrange]) - msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart" - "$(_token_error_descriptions[k]) $(repr(tokstr[1]))" - elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" - "$(_token_error_descriptions[k]) $(repr(tokstr))" - else - _token_error_descriptions[k] - end - emit_diagnostic(stream, tokrange, error=msg) - end - - if error_kind != K"None" - # Update the node with new error kind - stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS), - node.byte_span, node.orig_kind) - end - - fbyte = nbyte - end - sort!(stream.diagnostics, by=first_byte) -end - # Tree construction from the list of text ranges held by ParseStream # API for extracting results from ParseStream diff --git a/src/source_files.jl b/src/core/source_files.jl similarity index 100% rename from src/source_files.jl rename to src/core/source_files.jl diff --git a/src/tree_cursors.jl b/src/core/tree_cursors.jl similarity index 100% rename from src/tree_cursors.jl rename to src/core/tree_cursors.jl diff --git a/src/expr.jl b/src/integration/expr.jl similarity index 100% rename from src/expr.jl rename to src/integration/expr.jl diff --git a/src/hooks.jl b/src/integration/hooks.jl similarity index 100% rename from src/hooks.jl rename to src/integration/hooks.jl diff --git a/src/julia/julia_parse_stream.jl b/src/julia/julia_parse_stream.jl new file mode 100644 index 00000000..aab8a547 --- /dev/null +++ b/src/julia/julia_parse_stream.jl @@ -0,0 +1,315 @@ +# Token flags - may be set for operator kinded tokens +# Operator is dotted +const DOTOP_FLAG = RawFlags(1<<1) +# Operator has a suffix +const SUFFIXED_FLAG = RawFlags(1<<2) + +# Set for K"call", K"dotcall" or any syntactic operator heads +# Distinguish various syntaxes which are mapped to K"call" +const PREFIX_CALL_FLAG = RawFlags(0<<3) +const INFIX_FLAG = RawFlags(1<<3) +const PREFIX_OP_FLAG = RawFlags(2<<3) +const POSTFIX_OP_FLAG = RawFlags(3<<3) + +# The following flags are quite head-specific and may overlap + +""" +Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ``` +""" +const TRIPLE_STRING_FLAG = RawFlags(1<<5) + +""" +Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping +""" +const RAW_STRING_FLAG = RawFlags(1<<6) + +""" +Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses +""" +const PARENS_FLAG = RawFlags(1<<5) + +""" +Set for various delimited constructs when they contains a trailing comma. For +example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where +this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. +""" +const TRAILING_COMMA_FLAG = RawFlags(1<<6) + +""" +Set for K"quote" for the short form `:x` as opposed to long form `quote x end` +""" +const COLON_QUOTE = RawFlags(1<<5) + +""" +Set for K"toplevel" which is delimited by parentheses +""" +const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5) + +""" +Set for K"function" in short form definitions such as `f() = 1` +""" +const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5) + +""" +Set for K"struct" when mutable +""" +const MUTABLE_FLAG = RawFlags(1<<5) + +""" +Set for K"module" when it's not bare (`module`, not `baremodule`) +""" +const BARE_MODULE_FLAG = RawFlags(1<<5) + +# Flags holding the dimension of an nrow or other UInt8 not held in the source +# TODO: Given this is only used for nrow/ncat, we could actually use all the flags? +const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8) + +function set_numeric_flags(n::Integer) + f = RawFlags((n << 8) & NUMERIC_FLAGS) + if numeric_flags(f) != n + error("Numeric flags unable to hold large integer $n") + end + f +end + +function call_type_flags(f::RawFlags) + f & 0b11000 +end + +function numeric_flags(f::RawFlags) + Int((f >> 8) % UInt8) +end + +flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS) + +""" + is_prefix_call(x) + +Return true for normal prefix function call syntax such as the `f` call node +parsed from `f(x)`. +""" +is_prefix_call(x) = call_type_flags(x) == PREFIX_CALL_FLAG + +""" + is_infix_op_call(x) + +Return true for infix operator calls such as the `+` call node parsed from +`x + y`. +""" +is_infix_op_call(x) = call_type_flags(x) == INFIX_FLAG + +""" + is_prefix_op_call(x) + +Return true for prefix operator calls such as the `+` call node parsed from `+x`. +""" +is_prefix_op_call(x) = call_type_flags(x) == PREFIX_OP_FLAG + +""" + is_postfix_op_call(x) + +Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`. +""" +is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG + +""" + is_dotted(x) + +Return true for dotted syntax tokens +""" +is_dotted(x) = has_flags(x, DOTOP_FLAG) + +""" + is_suffixed(x) + +Return true for operators which have suffixes, such as `+₁` +""" +is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) + +""" + is_decorated(x) + +Return true for operators which are decorated with a dot or suffix. +""" +is_decorated(x) = is_dotted(x) || is_suffixed(x) + +""" + numeric_flags(x) + +Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"` +and `K"ncat"`, for now. +""" +numeric_flags(x) = numeric_flags(flags(x)) + +function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) + str = (is_error(kind(head)) ? untokenize(kind(head); unique=false) : + untokenize(kind(head); unique=unique))::String + if is_dotted(head) + str = "."*str + end + if include_flag_suff + # Ignore DOTOP_FLAG - it's represented above with . prefix + is_trivia(head) && (str = str*"-t") + is_infix_op_call(head) && (str = str*"-i") + is_prefix_op_call(head) && (str = str*"-pre") + is_postfix_op_call(head) && (str = str*"-post") + + k = kind(head) + if k in KSet"string cmdstring Identifier" + has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") + has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") + elseif k in KSet"tuple block macrocall" + has_flags(head, PARENS_FLAG) && (str = str*"-p") + elseif k == K"quote" + has_flags(head, COLON_QUOTE) && (str = str*"-:") + elseif k == K"toplevel" + has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") + elseif k == K"function" + has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") + elseif k == K"struct" + has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") + elseif k == K"module" + has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") + end + if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && + has_flags(head, TRAILING_COMMA_FLAG) + str *= "-," + end + is_suffixed(head) && (str = str*"-suf") + n = numeric_flags(head) + n != 0 && (str = str*"-"*string(n)) + end + str +end + + +#------------------------------------------------------------------------------- +# ParseStream Post-processing + +function validate_tokens(stream::ParseStream) + txtbuf = unsafe_textbuf(stream) + charbuf = IOBuffer() + + # Process terminal nodes in the output + fbyte = stream.output[1].byte_span+1 # Start after sentinel + for i = 2:length(stream.output) + node = stream.output[i] + if !is_terminal(node) || kind(node) == K"TOMBSTONE" + continue + end + + k = kind(node) + nbyte = fbyte + node.byte_span + tokrange = fbyte:nbyte-1 + error_kind = K"None" + + if k in KSet"Integer BinInt OctInt HexInt" + # The following shouldn't be able to error... + # parse_int_literal + # parse_uint_literal + elseif k == K"Float" || k == K"Float32" + underflow0 = false + if k == K"Float" + x, code = parse_float_literal(Float64, txtbuf, fbyte, nbyte) + # jl_strtod_c can return "underflow" even for valid cases such + # as `5e-324` where the source is an exact representation of + # `x`. So only warn when underflowing to zero. + underflow0 = code === :underflow && x == 0 + else + x, code = parse_float_literal(Float32, txtbuf, fbyte, nbyte) + underflow0 = code === :underflow && x == 0 + end + if code === :ok + # pass + elseif code === :overflow + emit_diagnostic(stream, tokrange, + error="overflow in floating point literal") + error_kind = K"ErrorNumericOverflow" + elseif underflow0 + emit_diagnostic(stream, tokrange, + warning="underflow to zero in floating point literal") + end + elseif k == K"Char" + @assert fbyte < nbyte # Already handled in the parser + truncate(charbuf, 0) + had_error = unescape_julia_string(charbuf, txtbuf, fbyte, + nbyte, stream.diagnostics) + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + else + seek(charbuf,0) + read(charbuf, Char) + if !eof(charbuf) + error_kind = K"ErrorOverLongCharacter" + emit_diagnostic(stream, tokrange, + error="character literal contains multiple characters") + end + end + elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG) + had_error = unescape_julia_string(devnull, txtbuf, fbyte, + nbyte, stream.diagnostics) + if had_error + error_kind = K"ErrorInvalidEscapeSequence" + end + elseif is_error(k) && k != K"error" + # Emit messages for non-generic token errors + tokstr = String(txtbuf[tokrange]) + msg = if k in KSet"ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart" + "$(_token_error_descriptions[k]) $(repr(tokstr[1]))" + elseif k in KSet"ErrorInvalidUTF8 ErrorBidiFormatting" + "$(_token_error_descriptions[k]) $(repr(tokstr))" + else + _token_error_descriptions[k] + end + emit_diagnostic(stream, tokrange, error=msg) + end + + if error_kind != K"None" + # Update the node with new error kind + stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS), + node.byte_span, node.orig_kind) + end + + fbyte = nbyte + end + sort!(stream.diagnostics, by=first_byte) +end + +""" + bump_split(stream, token_spec1, [token_spec2 ...]) + +Bump the next token, splitting it into several pieces + +Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. +If all `nbyte` are positive, the sum must equal the token length. If one +`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of +all `nbyte` must equal zero. + +This is a hack which helps resolves the occasional lexing ambiguity. For +example +* Whether .+ should be a single token or the composite (. +) which is used for + standalone operators. +* Whether ... is splatting (most of the time) or three . tokens in import paths + +TODO: Are these the only cases? Can we replace this general utility with a +simpler one which only splits preceding dots? +""" +function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} + tok = stream.lookahead[stream.lookahead_index] + stream.lookahead_index += 1 + start_b = _next_byte(stream) + toklen = tok.next_byte - start_b + prev_b = start_b + for (i, (nbyte, k, f)) in enumerate(split_spec) + h = SyntaxHead(k, f) + actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte + orig_k = k == K"." ? K"." : kind(tok) + node = RawGreenNode(h, actual_nbyte, orig_k) + push!(stream.output, node) + prev_b += actual_nbyte + stream.next_byte += actual_nbyte + end + @assert tok.next_byte == prev_b + stream.peek_count = 0 + return position(stream) +end diff --git a/src/kinds.jl b/src/julia/kinds.jl similarity index 100% rename from src/kinds.jl rename to src/julia/kinds.jl diff --git a/src/literal_parsing.jl b/src/julia/literal_parsing.jl similarity index 100% rename from src/literal_parsing.jl rename to src/julia/literal_parsing.jl diff --git a/src/parser.jl b/src/julia/parser.jl similarity index 100% rename from src/parser.jl rename to src/julia/parser.jl diff --git a/src/parser_api.jl b/src/julia/parser_api.jl similarity index 100% rename from src/parser_api.jl rename to src/julia/parser_api.jl diff --git a/src/tokenize.jl b/src/julia/tokenize.jl similarity index 100% rename from src/tokenize.jl rename to src/julia/tokenize.jl diff --git a/src/green_node.jl b/src/porcelain/green_node.jl similarity index 100% rename from src/green_node.jl rename to src/porcelain/green_node.jl diff --git a/src/syntax_tree.jl b/src/porcelain/syntax_tree.jl similarity index 100% rename from src/syntax_tree.jl rename to src/porcelain/syntax_tree.jl diff --git a/src/precompile.jl b/src/precompile.jl index 5a80d92d..9fb71c74 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -1,5 +1,5 @@ # Just parse some file as a precompile workload -let filename = joinpath(@__DIR__, "literal_parsing.jl") +let filename = joinpath(@__DIR__, "julia/literal_parsing.jl") text = read(filename, String) parseall(Expr, text) parseall(SyntaxNode, text)