Skip to content

Commit 73766d7

Browse files
authored
API updates: parse() -> parsestmt() + exports + parse!(rule=:all)
`JuliaSyntax.parse()` clashes with `Base.parse()` and it's really not clear whether `parse()` should parse a single statement or a whole file top-level. Having a less generic name `parsestmt()` helps with this. Using the name `parsestmt()` is explicit about the fact that this parses a single statement (not an "expression" which is ambiguous given every Julia construct is an expression). It's also consistent with the naming rules of `parseall()` and `parseatom()`, which already appear in Base as `Meta.parseall` and `Meta.parseatom`. Change to using `rule=:all` in the `parse!()` API rather than `rule=:toplevel` because this is most consistent with the choices made in the `Core._parser` interface and the naming of `parseall()`. Also add a conservative list of exports that I expect "people are likely to use", and which seem to be required as the main part of the API. There's more to the API than this, but shoving that all into the user's namespace doesn't seem ideal. Especially the parts which are less certain.
1 parent 419d173 commit 73766d7

File tree

10 files changed

+202
-161
lines changed

10 files changed

+202
-161
lines changed

README.md

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,16 @@ First, a source-ordered AST with `SyntaxNode` (`call-i` in the dump here means
5050
the `call` has the infix `-i` flag):
5151

5252
```julia
53-
julia> using JuliaSyntax: JuliaSyntax, SyntaxNode, GreenNode
53+
julia> using JuliaSyntax
5454

55-
julia> JuliaSyntax.parse(SyntaxNode, "(x + y)*z", filename="foo.jl")
55+
julia> parsestmt(SyntaxNode, "(x + y)*z", filename="foo.jl")
5656
line:col│ tree │ file_name
5757
1:1 │[call-i] │foo.jl
58-
1:2 │ [call-i]
59-
1:2 │ x
60-
1:4+
61-
1:6 │ y
58+
1:1 │ [parens]
59+
1:2 │ [call-i]
60+
1:2 │ x
61+
1:4+
62+
1:6 │ y
6263
1:8*
6364
1:9 │ z
6465
```
@@ -71,16 +72,17 @@ representation, despite being important for parsing.
7172

7273
```julia
7374
julia> text = "(x + y)*z"
74-
greentree = JuliaSyntax.parse(GreenNode, text)
75+
greentree = parsestmt(JuliaSyntax.GreenNode, text)
7576
1:9 │[call]
76-
1:1 │ (
77-
2:6 │ [call]
78-
2:2 │ Identifier ✔
79-
3:3 │ Whitespace
80-
4:4+
81-
5:5 │ Whitespace
82-
6:6 │ Identifier ✔
83-
7:7 │ )
77+
1:7 │ [parens]
78+
1:1 │ (
79+
2:6 │ [call]
80+
2:2 │ Identifier ✔
81+
3:3 │ Whitespace
82+
4:4+
83+
5:5 │ Whitespace
84+
6:6 │ Identifier ✔
85+
7:7 │ )
8486
8:8*
8587
9:9 │ Identifier ✔
8688
```
@@ -91,22 +93,23 @@ supplying the source text string:
9193
```julia
9294
julia> show(stdout, MIME"text/plain"(), greentree, text)
9395
1:9 │[call]
94-
1:1 │ ( "("
95-
2:6 │ [call]
96-
2:2 │ Identifier ✔ "x"
97-
3:3 │ Whitespace " "
98-
4:4+"+"
99-
5:5 │ Whitespace " "
100-
6:6 │ Identifier ✔ "y"
101-
7:7 │ ) ")"
96+
1:7 │ [parens]
97+
1:1 │ ( "("
98+
2:6 │ [call]
99+
2:2 │ Identifier ✔ "x"
100+
3:3 │ Whitespace " "
101+
4:4+"+"
102+
5:5 │ Whitespace " "
103+
6:6 │ Identifier ✔ "y"
104+
7:7 │ ) ")"
102105
8:8*"*"
103106
9:9 │ Identifier ✔ "z"
104107
```
105108

106109
Julia `Expr` can also be produced:
107110

108111
```julia
109-
julia> JuliaSyntax.parse(Expr, "(x + y)*z")
112+
julia> JuliaSyntax.parsestmt(Expr, "(x + y)*z")
110113
:((x + y) * z)
111114
```
112115

src/JuliaSyntax.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,23 @@
11
module JuliaSyntax
22

3+
# Conservative list of exports - only export the most common/useful things
4+
# here.
5+
6+
# Parsing. See also
7+
# parse!(), ParseStream
8+
export parsestmt, parseall, parseatom
9+
# Tokenization
10+
export tokenize, Token, untokenize
11+
# Source file handling. See also
12+
# highlight() sourcetext() source_line() source_location()
13+
export SourceFile
14+
# Expression heads/kinds. See also
15+
# flags() and related predicates.
16+
export @K_str, kind, head
17+
# Syntax tree types. See also
18+
# GreenNode
19+
export SyntaxNode
20+
321
# Helper utilities
422
include("utils.jl")
523

@@ -26,4 +44,5 @@ include("expr.jl")
2644
# Hooks to integrate the parser with Base
2745
include("hooks.jl")
2846
include("precompile.jl")
47+
2948
end

src/hooks.jl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ end
122122
# Debug log file for dumping parsed code
123123
const _debug_log = Ref{Union{Nothing,IO}}(nothing)
124124

125-
function _core_parser_hook(code, filename, lineno, offset, options)
125+
function _core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol)
126126
try
127127
# TODO: Check that we do all this input wrangling without copying the
128128
# code buffer
@@ -144,8 +144,7 @@ function _core_parser_hook(code, filename, lineno, offset, options)
144144
seek(io, offset)
145145

146146
stream = ParseStream(io)
147-
rule = options === :all ? :toplevel : options
148-
if rule === :statement || rule === :atom
147+
if options === :statement || options === :atom
149148
# To copy the flisp parser driver:
150149
# * Parsing atoms consumes leading trivia
151150
# * Parsing statements consumes leading+trailing trivia
@@ -157,8 +156,8 @@ function _core_parser_hook(code, filename, lineno, offset, options)
157156
return Core.svec(nothing, last_byte(stream))
158157
end
159158
end
160-
parse!(stream; rule=rule)
161-
if rule === :statement
159+
parse!(stream; rule=options)
160+
if options === :statement
162161
bump_trivia(stream)
163162
end
164163

@@ -342,7 +341,7 @@ function _fl_parse_string(text::AbstractString, filename::AbstractString,
342341
ex, offset+1
343342
end
344343

345-
# Convenience functions to mirror `JuliaSyntax.parse(Expr, ...)` in simple cases.
344+
# Convenience functions to mirror `JuliaSyntax.parsestmt(Expr, ...)` in simple cases.
346345
fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...)
347346
fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...)
348347

src/parser_api.jl

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,25 @@ Base.display_error(io::IO, err::ParseError, bt) = Base.showerror(io, err, bt)
2828

2929

3030
"""
31-
parse!(stream::ParseStream; rule=:toplevel)
31+
parse!(stream::ParseStream; rule=:all)
3232
3333
Parse Julia source code from a [`ParseStream`](@ref) object. Output tree data
3434
structures may be extracted from `stream` with the [`build_tree`](@ref) function.
3535
3636
`rule` may be any of
37-
* `:toplevel` (default) — parse a whole "file" of top level statements. In this
37+
* `:all` (default) — parse a whole "file" of top level statements. In this
3838
mode, the parser expects to fully consume the input.
3939
* `:statement` — parse a single statement, or statements separated by semicolons.
4040
* `:atom` — parse a single syntax "atom": a literal, identifier, or
4141
parenthesized expression.
4242
"""
43-
function parse!(stream::ParseStream; rule::Symbol=:toplevel)
43+
function parse!(stream::ParseStream; rule::Symbol=:all)
44+
if rule == :toplevel
45+
Base.depwarn("Use of rule == :toplevel in parse!() is deprecated. use `rule=:all` instead.", :parse!)
46+
rule = :all
47+
end
4448
ps = ParseState(stream)
45-
if rule === :toplevel
49+
if rule === :all
4650
parse_toplevel(ps)
4751
elseif rule === :statement
4852
parse_stmts(ps)
@@ -56,14 +60,14 @@ function parse!(stream::ParseStream; rule::Symbol=:toplevel)
5660
end
5761

5862
"""
59-
parse!(TreeType, io::IO; rule=:toplevel, version=VERSION)
63+
parse!(TreeType, io::IO; rule=:all, version=VERSION)
6064
6165
Parse Julia source code from a seekable `IO` object. The output is a tuple
6266
`(tree, diagnostics)`. When `parse!` returns, the stream `io` is positioned
6367
directly after the last byte which was consumed during parsing.
6468
"""
6569
function parse!(::Type{TreeType}, io::IO;
66-
rule::Symbol=:toplevel, version=VERSION, kws...) where {TreeType}
70+
rule::Symbol=:all, version=VERSION, kws...) where {TreeType}
6771
stream = ParseStream(io; version=version)
6872
parse!(stream; rule=rule)
6973
tree = build_tree(TreeType, stream; kws...)
@@ -75,7 +79,7 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=
7579
ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false,
7680
ignore_warnings=ignore_errors) where {T}
7781
stream = ParseStream(text, index; version=version)
78-
if ignore_trivia && rule != :toplevel
82+
if ignore_trivia && rule != :all
7983
bump_trivia(stream, skip_newlines=true)
8084
empty!(stream)
8185
end
@@ -100,19 +104,22 @@ function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=
100104
end
101105

102106
_parse_docs = """
103-
parse(TreeType, text, [index];
104-
version=VERSION,
105-
ignore_trivia=true,
106-
filename=nothing,
107-
ignore_errors=false,
108-
ignore_warnings=ignore_errors)
109-
110-
# Or, with the same arguments
107+
# Parse a single expression/statement
108+
parsestmt(TreeType, text, [index];
109+
version=VERSION,
110+
ignore_trivia=true,
111+
filename=nothing,
112+
ignore_errors=false,
113+
ignore_warnings=ignore_errors)
114+
115+
# Parse all statements at top level (file scope)
111116
parseall(...)
117+
118+
# Parse a single syntax atom
112119
parseatom(...)
113120
114121
Parse Julia source code string `text` into a data structure of type `TreeType`.
115-
`parse` parses a single Julia statement, `parseall` parses top level statements
122+
`parsestmt` parses a single Julia statement, `parseall` parses top level statements
116123
at file scope and `parseatom` parses a single Julia identifier or other "syntax
117124
atom".
118125
@@ -136,16 +143,17 @@ parsing. To avoid exceptions due to warnings, use `ignore_warnings=true`. To
136143
also avoid exceptions due to errors, use `ignore_errors=true`.
137144
"""
138145

139-
parse(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1]
140-
parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:toplevel, true, T, text; kws...)[1]
141-
parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1]
146+
"$_parse_docs"
147+
parsestmt(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:statement, true, T, text; kws...)[1]
142148

143-
@eval @doc $_parse_docs parse
144-
@eval @doc $_parse_docs parseall
145-
@eval @doc $_parse_docs parseatom
149+
"$_parse_docs"
150+
parseall(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:all, true, T, text; kws...)[1]
146151

147-
parse(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...)
148-
parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:toplevel, false, T, text, index; kws...)
152+
"$_parse_docs"
153+
parseatom(::Type{T}, text::AbstractString; kws...) where {T} = _parse(:atom, true, T, text; kws...)[1]
154+
155+
parsestmt(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:statement, false, T, text, index; kws...)
156+
parseall(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:all, false, T, text, index; kws...)
149157
parseatom(::Type{T}, text::AbstractString, index::Integer; kws...) where {T} = _parse(:atom, false, T, text, index; kws...)
150158

151159
#-------------------------------------------------------------------------------
@@ -178,7 +186,7 @@ This interface works on UTF-8 encoded string or buffer data only.
178186
"""
179187
function tokenize(text)
180188
ps = ParseStream(text)
181-
parse!(ps, rule=:toplevel)
189+
parse!(ps, rule=:all)
182190
ts = ps.tokens
183191
output_tokens = Token[]
184192
for i = 2:length(ts)
@@ -198,3 +206,5 @@ end
198206
function untokenize(token::Token, text::Vector{UInt8})
199207
text[token.range]
200208
end
209+
210+
@deprecate parse parsestmt

src/source_files.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,18 @@ function SourceFile(; filename, kwargs...)
3636
end
3737

3838
# Get line number of the given byte within the code
39-
function source_line_index(source::SourceFile, byte_index)
39+
function _source_line_index(source::SourceFile, byte_index)
4040
lineidx = searchsortedlast(source.line_starts, byte_index)
4141
return (lineidx < lastindex(source.line_starts)) ? lineidx : lineidx-1
4242
end
4343
_source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1
44-
source_line(source::SourceFile, byte_index) = _source_line(source, source_line_index(source, byte_index))
44+
source_line(source::SourceFile, byte_index) = _source_line(source, _source_line_index(source, byte_index))
4545

4646
"""
4747
Get line number and character within the line at the given byte index.
4848
"""
4949
function source_location(source::SourceFile, byte_index)
50-
lineidx = source_line_index(source, byte_index)
50+
lineidx = _source_line_index(source, byte_index)
5151
i = source.line_starts[lineidx]
5252
column = 1
5353
while i < byte_index
@@ -63,7 +63,7 @@ Get byte range of the source line at byte_index, buffered by
6363
"""
6464
function source_line_range(source::SourceFile, byte_index;
6565
context_lines_before=0, context_lines_after=0)
66-
lineidx = source_line_index(source, byte_index)
66+
lineidx = _source_line_index(source, byte_index)
6767
fbyte = source.line_starts[max(lineidx-context_lines_before, 1)]
6868
lbyte = source.line_starts[min(lineidx+1+context_lines_after, end)] - 1
6969
fbyte,lbyte

test/benchmark.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ end
1515

1616
all_base_code = concat_base()
1717

18-
b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:toplevel)
18+
b_ParseStream = @benchmark JuliaSyntax.parse!(JuliaSyntax.ParseStream(all_base_code), rule=:all)
1919
b_GreenNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.GreenNode, all_base_code)
2020
b_SyntaxNode = @benchmark JuliaSyntax.parseall(JuliaSyntax.SyntaxNode, all_base_code)
2121
b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code)
@@ -30,5 +30,5 @@ b_Expr = @benchmark JuliaSyntax.parseall(Expr, all_base_code)
3030
# Allocs.clear()
3131
# stream = JuliaSyntax.ParseStream(text);
3232
# JuliaSyntax.peek(stream);
33-
# Allocs.@profile sample_rate=1 JuliaSyntax.parse(stream)
33+
# Allocs.@profile sample_rate=1 JuliaSyntax.parsestmt(stream)
3434
# PProf.Allocs.pprof()

0 commit comments

Comments
 (0)