3
3
# This is defined separately from parser.jl so that:
4
4
# * parser.jl doesn't need to refer to any tree data structures
5
5
# * It's clear which parts are the public API
6
- #
7
- # What should the general parsing API look like? Some points to consider:
8
- #
9
- # * After parsing atoms or statements or most other internal rules, it's
10
- # usual to start in the middle of the input text and end somewhere else in
11
- # the middle of the input text. So we should taken an index for the start of
12
- # parsing and supply an index back to the caller after parsing.
13
- #
14
- # * `parseall` is a special case where we expect to consume all the input.
15
- # Perhaps this is the API which throws an error if we don't consume it all,
16
- # and doesn't accept an index as input?
17
- #
18
- # * The ParseStream is the fundamental interface which wraps the code string
19
- # and index up together for input and contains the output events, diagnostics
20
- # and current stream position after parsing. The user should potentially be
21
- # able to use this directly. It does, however assume a Julia-compatible token
22
- # stream.
23
- #
24
- # * It could be useful to support an IO-based interface so that users can parse
25
- # Julia code intermixed with other DSLs. Documenter.jl and string macros come
26
- # to mind as examples which could use this. A tricky part is deciding where
27
- # the input ends: For string macros this is done by the parser, but for
28
- # Documenter it's probably just done beforehand according to the Markdown
29
- # code block rules.
30
- #
31
- # * The API should have an interface where a simple string is passed in. How
32
- # does SourceFile relate to this?
33
- #
34
- # * It's neat for `parse` to be overloadable to produce various output data
35
- # structures; GreenNode, SyntaxNode, Expr, (etc?) in the same way that
36
- # Base.parse can be used for non-Julia code. (Heh... though
37
- # `Base.parse(Expr, "...")` would also make a certain amount of sense.)
38
- #
39
- # * What's the no-copy API look like? A String can be put into an IOBuffer via
40
- # unsafe_wrap(Vector{UInt8}, str) ... A SubString likewise. Also there's the
41
- # `codeunits` function to hold a GC-safe view of string data as an array (but
42
- # we can't use a Vector{UInt8})
43
6
44
7
struct ParseError <: Exception
45
8
source:: SourceFile
@@ -65,39 +28,19 @@ Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt)
65
28
66
29
67
30
"""
68
- # Input and output:
69
- stream = parse(stream::ParseStream; kws...)
70
- (tree, diagnostics) = parse(TreeType, io::IOBuffer; kws...)
71
- (tree, diagnostics, index) = parse(TreeType, str::AbstractString, [index::Integer]; kws...)
72
- # Keywords
73
- parse(...; rule=:toplevel, version=VERSION, ignore_trivia=true)
74
-
75
- Parse Julia source code from `input`, returning the output in a format
76
- compatible with `input`:
77
-
78
- * When `input` is a `ParseStream`, the stream itself is returned and the
79
- `ParseStream` interface can be used to process the output.
80
- * When `input` is a seekable `IO` subtype, the output is `(tree, diagnostics)`.
81
- The buffer `position` will be set to the next byte of input.
82
- * When `input` is an `AbstractString, Integer`, or `Vector{UInt8}, Integer` the
83
- output is `(tree, diagnostics, index)`, where `index` (default 1) is the next
84
- byte of input.
31
+ parse!(stream::ParseStream; rule=:toplevel)
32
+
33
+ Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data
34
+ structures may be extracted from `stream` with the [`build_tree`](@ref) function.
85
35
86
36
`rule` may be any of
87
- * `toplevel` (default) — parse a whole "file" of top level statements. In this
37
+ * `: toplevel` (default) — parse a whole "file" of top level statements. In this
88
38
mode, the parser expects to fully consume the input.
89
- * `statement` — parse a single statement, or statements separated by semicolons.
90
- * `atom` — parse a single syntax "atom": a literal, identifier, or
39
+ * `: statement` — parse a single statement, or statements separated by semicolons.
40
+ * `: atom` — parse a single syntax "atom": a literal, identifier, or
91
41
parenthesized expression.
92
-
93
- `version` (default `VERSION`) may be used to set the syntax version to
94
- any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been
95
- added after v"1.0", emitting an error if it's not compatible with the requested
96
- `version`.
97
-
98
- See also [`parseall`](@ref) for a simpler but less powerful interface.
99
42
"""
100
- function parse (stream:: ParseStream ; rule:: Symbol = :toplevel )
43
+ function parse! (stream:: ParseStream ; rule:: Symbol = :toplevel )
101
44
ps = ParseState (stream)
102
45
if rule === :toplevel
103
46
parse_toplevel (ps)
@@ -111,56 +54,37 @@ function parse(stream::ParseStream; rule::Symbol=:toplevel)
111
54
stream
112
55
end
113
56
114
- function parse (:: Type{T} , io:: IO ;
115
- rule:: Symbol = :toplevel , version= VERSION , kws... ) where {T}
57
+ """
58
+ parse!(TreeType, io::IO; rule=:toplevel, version=VERSION)
59
+
60
+ Parse Julia source code from a seekable `IO` object. The output is a tuple
61
+ `(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned
62
+ directly after the last byte which was consumed during parsing.
63
+ """
64
+ function parse! (:: Type{TreeType} , io:: IO ;
65
+ rule:: Symbol = :toplevel , version= VERSION , kws... ) where {TreeType}
116
66
stream = ParseStream (io; version= version)
117
- parse (stream; rule= rule)
118
- tree = build_tree (T , stream; kws... )
67
+ parse! (stream; rule= rule)
68
+ tree = build_tree (TreeType , stream; kws... )
119
69
seek (io, last_byte (stream))
120
70
tree, stream. diagnostics
121
71
end
122
72
123
- # Generic version of parse for all other cases where an index must be passed
124
- # back - ie strings and buffers
125
- function parse (:: Type{T} , input... ;
126
- rule:: Symbol = :toplevel , version= VERSION , kws... ) where {T}
127
- stream = ParseStream (input... ; version= version)
128
- parse (stream; rule= rule)
129
- tree = build_tree (T, stream; kws... )
130
- tree, stream. diagnostics, last_byte (stream) + 1
131
- end
132
-
133
-
134
- """
135
- parseall(TreeType, input...;
136
- rule=:toplevel,
137
- version=VERSION,
138
- ignore_trivia=true)
139
-
140
- Experimental convenience interface to parse `input` as Julia code, emitting an
141
- error if the entire input is not consumed. `input` can be a string or any other
142
- valid input to the `ParseStream` constructor. By default `parseall` will ignore
143
- whitespace and comments before and after valid code but you can turn this off
144
- by setting `ignore_trivia=false`.
145
-
146
- A `ParseError` will be thrown if any errors occurred during parsing.
147
-
148
- See [`parse`](@ref) for a more complete and powerful interface to the parser,
149
- as well as a description of the `version` and `rule` keywords.
150
- """
151
- function parseall (:: Type{T} , input... ; rule= :toplevel , version= VERSION ,
152
- ignore_trivia= true , filename= nothing ) where {T}
153
- stream = ParseStream (input... ; version= version)
73
+ function _parse (rule:: Symbol , need_eof:: Bool , :: Type{T} , text, index= 1 ; version= VERSION ,
74
+ ignore_trivia= true , filename= nothing , ignore_warnings= false ) where {T}
75
+ stream = ParseStream (text, index; version= version)
154
76
if ignore_trivia && rule != :toplevel
155
77
bump_trivia (stream, skip_newlines= true )
156
78
empty! (stream)
157
79
end
158
- parse (stream; rule= rule)
159
- if (ignore_trivia && peek (stream, skip_newlines= true ) != K " EndMarker" ) ||
160
- (! ignore_trivia && (peek (stream, skip_newlines= false , skip_whitespace= false ) != K " EndMarker" ))
161
- emit_diagnostic (stream, error= " unexpected text after parsing $rule " )
80
+ parse! (stream; rule= rule)
81
+ if need_eof
82
+ if (ignore_trivia && peek (stream, skip_newlines= true ) != K " EndMarker" ) ||
83
+ (! ignore_trivia && (peek (stream, skip_newlines= false , skip_whitespace= false ) != K " EndMarker" ))
84
+ emit_diagnostic (stream, error= " unexpected text after parsing $rule " )
85
+ end
162
86
end
163
- if any_error (stream. diagnostics)
87
+ if any_error (stream. diagnostics) || ( ! ignore_warnings && ! isempty (stream . diagnostics))
164
88
throw (ParseError (stream, filename= filename))
165
89
end
166
90
# TODO : Figure out a more satisfying solution to the wrap_toplevel_as_kind
@@ -169,13 +93,51 @@ function parseall(::Type{T}, input...; rule=:toplevel, version=VERSION,
169
93
# not absolute positions.
170
94
# * Dropping it would be ok for SyntaxNode and Expr...
171
95
tree = build_tree (T, stream; wrap_toplevel_as_kind= K " toplevel" , filename= filename)
172
- if ! isempty (stream. diagnostics)
173
- # Crudely format any warnings to the current logger.
174
- buf = IOBuffer ()
175
- show_diagnostics (IOContext (buf, stdout ), stream,
176
- SourceFile (sourcetext (stream, steal_textbuf= true ), filename= filename))
177
- @warn Text (String (take! (buf)))
178
- end
179
- tree
96
+ tree, last_byte (stream) + 1
180
97
end
181
98
99
+ """
100
+ parse(TreeType, text, [index];
101
+ version=VERSION,
102
+ ignore_trivia=true,
103
+ filename=nothing,
104
+ ignore_warnings=false)
105
+
106
+ # Or, with the same arguments
107
+ parseall(...)
108
+ parseatom(...)
109
+
110
+ Parse Julia source code string `text` into a data structure of type `TreeType`.
111
+ `parse` parses a single Julia statement, `parseall` parses top level statements
112
+ at file scope and `parseatom` parses a single Julia identifier or other "syntax
113
+ atom".
114
+
115
+ If `text` is passed without `index`, all the input text must be consumed and a
116
+ tree data structure is returned. When an integer byte `index` is passed, a
117
+ tuple `(tree, next_index)` will be returned containing the next index in `text`
118
+ to resume parsing. By default whitespace and comments before and after valid
119
+ code are ignored but you can turn this off by setting `ignore_trivia=false`.
120
+
121
+ `version` (default `VERSION`) may be used to set the syntax version to
122
+ any Julia version `>= v"1.0"`. We aim to parse all Julia syntax which has been
123
+ added after v"1.0", emitting an error if it's not compatible with the requested
124
+ `version`.
125
+
126
+ Pass `filename` to set any file name information embedded within the output
127
+ tree, if applicable. This will also annotate errors and warnings with the
128
+ source file name.
129
+
130
+ A `ParseError` will be thrown if any errors or warnings occurred during
131
+ parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`.
132
+ """
133
+ parse (:: Type{T} , text:: AbstractString ; kws... ) where {T} = _parse (:statement , true , T, text; kws... )[1 ]
134
+ parseall (:: Type{T} , text:: AbstractString ; kws... ) where {T} = _parse (:toplevel , true , T, text; kws... )[1 ]
135
+ parseatom (:: Type{T} , text:: AbstractString ; kws... ) where {T} = _parse (:atom , true , T, text; kws... )[1 ]
136
+
137
+ @eval @doc $ (@doc parse) parseall
138
+ @eval @doc $ (@doc parse) parseatom
139
+
140
+ parse (:: Type{T} , text:: AbstractString , index:: Integer ; kws... ) where {T} = _parse (:statement , false , T, text, index; kws... )
141
+ parseall (:: Type{T} , text:: AbstractString , index:: Integer ; kws... ) where {T} = _parse (:toplevel , false , T, text, index; kws... )
142
+ parseatom (:: Type{T} , text:: AbstractString , index:: Integer ; kws... ) where {T} = _parse (:atom , false , T, text, index; kws... )
143
+
0 commit comments