Skip to content

Commit 3dbcd32

Browse files
authored
Cleanup/document source code access functions + add filename() (#470)
* Move all source code access functions which refer to source locations and strings into the top of source_files.jl, and add some documentation for these. * Add `filename()` function to determine source file name of a syntax object * Also add a minor generalization to SyntaxNode->Expr conversion code to make Expr conversion general enough to allow it to also be used for JuliaLowering.SyntaxTree. (internal/experimental interface, for now)
1 parent a41f5e1 commit 3dbcd32

File tree

9 files changed

+182
-89
lines changed

9 files changed

+182
-89
lines changed

docs/src/api.md

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,36 @@ JuliaSyntax.untokenize
3030
JuliaSyntax.Token
3131
```
3232

33-
## Source file handling
33+
## Source code handling
34+
35+
This section describes the generic functions for source text, source location
36+
computation and formatting functions.
37+
38+
Contiguous syntax objects like nodes in the syntax tree should implement the
39+
following where possible:
3440

3541
```@docs
36-
JuliaSyntax.SourceFile
37-
JuliaSyntax.highlight
38-
JuliaSyntax.sourcetext
42+
JuliaSyntax.sourcefile
43+
JuliaSyntax.byte_range
44+
```
45+
46+
This will provide implementations of the following which include range
47+
information, line numbers, and fancy highlighting of source ranges:
48+
49+
```@docs
50+
JuliaSyntax.first_byte
51+
JuliaSyntax.last_byte
52+
JuliaSyntax.filename
3953
JuliaSyntax.source_line
4054
JuliaSyntax.source_location
55+
JuliaSyntax.sourcetext
56+
JuliaSyntax.highlight
57+
```
58+
59+
`SourceFile`-specific functions:
60+
61+
```@docs
62+
JuliaSyntax.SourceFile
4163
JuliaSyntax.source_line_range
4264
```
4365

@@ -64,8 +86,5 @@ JuliaSyntax.GreenNode
6486
```
6587

6688
Functions applicable to syntax trees include everything in the sections on
67-
heads/kinds, and source file handling.
68-
69-
```@docs
70-
JuliaSyntax.byte_range
71-
```
89+
heads/kinds as well as the accessor functions in the source code handling
90+
section.

src/diagnostics.jl

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ function Diagnostic(first_byte, last_byte; error=nothing, warning=nothing)
3737
Diagnostic(first_byte, last_byte, level, message)
3838
end
3939

40-
first_byte(d::Diagnostic) = d.first_byte
41-
last_byte(d::Diagnostic) = d.last_byte
40+
byte_range(d::Diagnostic) = d.first_byte:d.last_byte
4241
is_error(d::Diagnostic) = d.level === :error
4342

4443
# Make relative path into a file URL
@@ -72,12 +71,12 @@ function show_diagnostic(io::IO, diagnostic::Diagnostic, source::SourceFile)
7271
(:normal, "Info")
7372
line, col = source_location(source, first_byte(diagnostic))
7473
linecol = "$line:$col"
75-
filename = source.filename
74+
fname = filename(source)
7675
file_href = nothing
77-
if !isnothing(filename)
78-
locstr = "$filename:$linecol"
79-
if !startswith(filename, "REPL[") && get(io, :color, false)
80-
url = _file_url(filename)
76+
if !isempty(fname)
77+
locstr = "$fname:$linecol"
78+
if !startswith(fname, "REPL[") && get(io, :color, false)
79+
url = _file_url(fname)
8180
if !isnothing(url)
8281
file_href = url*"#$linecol"
8382
end

src/expr.jl

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ function _strip_parens(ex)
6868
end
6969
end
7070

71+
# Get Julia value of leaf node as it would be represented in `Expr` form
72+
function _expr_leaf_val(node::SyntaxNode)
73+
node.val
74+
end
75+
7176
function _leaf_to_Expr(source, txtbuf, head, srcrange, node)
7277
k = kind(head)
7378
if k == K"core_@cmd"
@@ -79,7 +84,7 @@ function _leaf_to_Expr(source, txtbuf, head, srcrange, node)
7984
Expr(:error) :
8085
Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`")
8186
else
82-
val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : node.val
87+
val = isnothing(node) ? parse_julia_literal(txtbuf, head, srcrange) : _expr_leaf_val(node)
8388
if val isa Union{Int128,UInt128,BigInt}
8489
# Ignore the values of large integers and convert them back to
8590
# symbolic/textural form for compatibility with the Expr
@@ -519,14 +524,7 @@ function build_tree(::Type{Expr}, stream::ParseStream;
519524
only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex]))
520525
end
521526

522-
"""
523-
Get the source file for a given syntax object
524-
"""
525-
function sourcefile(node::SyntaxNode)
526-
node.source
527-
end
528-
529-
function _to_expr(node::SyntaxNode)
527+
function _to_expr(node)
530528
file = sourcefile(node)
531529
if !haschildren(node)
532530
offset, txtbuf = _unsafe_wrap_substring(sourcetext(file))
@@ -537,9 +535,13 @@ function _to_expr(node::SyntaxNode)
537535
_internal_node_to_Expr(file, byte_range(node), head(node), byte_range.(cs), head.(cs), args)
538536
end
539537

540-
function Base.Expr(node::SyntaxNode)
538+
function to_expr(node)
541539
ex = _to_expr(node)
542-
loc = source_location(LineNumberNode, sourcefile(node), first_byte(node))
540+
loc = source_location(LineNumberNode, node)
543541
only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex]))
544542
end
545543

544+
function Base.Expr(node::SyntaxNode)
545+
to_expr(node)
546+
end
547+

src/parse_stream.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -514,8 +514,7 @@ struct FullToken
514514
end
515515

516516
head(t::FullToken) = t.head
517-
first_byte(t::FullToken) = t.first_byte
518-
last_byte(t::FullToken) = t.last_byte
517+
byte_range(t::FullToken) = t.first_byte:t.last_byte
519518
span(t::FullToken) = 1 + last_byte(t) - first_byte(t)
520519

521520
function peek_full_token(stream::ParseStream, n::Integer=1;

src/parser_api.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ function Base.showerror(io::IO, err::ParseError)
2626
show_diagnostics(io, err.diagnostics[1:i], err.source)
2727
end
2828

29+
sourcefile(err::ParseError) = err.source
30+
2931
"""
3032
parse!(stream::ParseStream; rule=:all)
3133

src/source_files.jl

Lines changed: 124 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,110 @@
1+
#-------------------------------------------------------------------------------
2+
# Generic functions for source text, source location computation and formatting
3+
# functions
4+
5+
"""
6+
sourcefile(x)
7+
8+
Get the source file object (usually `SourceFile`) for a given syntax object
9+
`x`. The source file along with a byte range may be used to compute
10+
`source_line()`, `source_location()`, `filename()`, etc.
11+
"""
12+
function sourcefile
13+
end
14+
15+
"""
16+
byte_range(x)
17+
18+
Return the range of bytes which `x` covers in the source text.
19+
"""
20+
function byte_range
21+
end
22+
23+
"""
24+
first_byte(x)
25+
26+
Return the first byte of `x` in the source text.
27+
"""
28+
first_byte(x) = first(byte_range(x))
29+
30+
"""
31+
first_byte(x)
32+
33+
Return the last byte of `x` in the source text.
34+
"""
35+
last_byte(x) = last(byte_range(x))
36+
37+
"""
38+
filename(x)
39+
40+
Get file name associated with `source`, or an empty string if one didn't exist.
41+
42+
For objects `x` such as syntax trees, defers to `filename(sourcefile(x))` by
43+
default.
44+
"""
45+
function filename(x)
46+
source = sourcefile(x)
47+
isnothing(source) ? "" : filename(source)
48+
end
49+
50+
"""
51+
source_line(x)
52+
source_line(source::SourceFile, byte_index::Integer)
53+
54+
Get the line number of the first line on which object `x` appears. In the
55+
second form, get the line number at the given `byte_index` within `source`.
56+
"""
57+
source_line(x) = source_line(sourcefile(x), first_byte(x))
58+
59+
"""
60+
souce_location(x)
61+
souce_location(source::SourceFile, byte_index::Integer)
62+
63+
souce_location(LineNumberNode, x)
64+
souce_location(LineNumberNode, source, byte_index)
65+
66+
Get `(line,column)` of the first byte where object `x` appears in the source.
67+
The second form allows one to be more precise with the `byte_index`, given the
68+
source file.
69+
70+
Providing `LineNumberNode` as the first agrument will return the line and file
71+
name in a line number node object.
72+
"""
73+
source_location(x) = source_location(sourcefile(x), first_byte(x))
74+
75+
"""
76+
sourcetext(x)
77+
78+
Get the full source text syntax object `x`
79+
"""
80+
function sourcetext(x)
81+
view(sourcefile(x), byte_range(x))
82+
end
83+
84+
"""
85+
highlight(io, x; color, note, notecolor,
86+
context_lines_before, context_lines_inner, context_lines_after)
87+
88+
highlight(io::IO, source::SourceFile, range::UnitRange; kws...)
89+
90+
Print the lines of source code surrounding `x` which is highlighted with
91+
background `color` and underlined with markers in the text. A `note` in
92+
`notecolor` may be provided as annotation. By default, `x` should be an object
93+
with `sourcefile(x)` and `byte_range(x)` implemented.
94+
95+
The context arguments `context_lines_before`, etc, refer to the number of
96+
lines of code which will be printed as context before and after, with `inner`
97+
referring to context lines inside a multiline region.
98+
99+
The second form shares the keywords of the first but allows an explicit source
100+
file and byte range to be supplied.
101+
"""
102+
function highlight(io::IO, x; kws...)
103+
highlight(io, sourcefile(x), byte_range(x); kws...)
104+
end
105+
106+
107+
#-------------------------------------------------------------------------------
1108
"""
2109
SourceFile(code [; filename=nothing, first_line=1, first_index=1])
3110
@@ -53,16 +160,19 @@ function _source_line_index(source::SourceFile, byte_index)
53160
end
54161
_source_line(source::SourceFile, lineidx) = lineidx + source.first_line - 1
55162

56-
"""
57-
Get the line number at the given byte index.
58-
"""
59-
source_line(source::SourceFile, byte_index) =
163+
function source_location(::Type{LineNumberNode}, x)
164+
source_location(LineNumberNode, sourcefile(x), first_byte(x))
165+
end
166+
167+
source_line(source::SourceFile, byte_index::Integer) =
60168
_source_line(source, _source_line_index(source, byte_index))
61169

62-
"""
63-
Get line number and character within the line at the given byte index.
64-
"""
65-
function source_location(source::SourceFile, byte_index)
170+
function filename(source::SourceFile)
171+
f = source.filename
172+
!isnothing(f) ? f : ""
173+
end
174+
175+
function source_location(source::SourceFile, byte_index::Integer)
66176
lineidx = _source_line_index(source, byte_index)
67177
i = source.line_starts[lineidx]
68178
column = 1
@@ -77,7 +187,7 @@ end
77187
Get byte range of the source line at byte_index, buffered by
78188
`context_lines_before` and `context_lines_after` before and after.
79189
"""
80-
function source_line_range(source::SourceFile, byte_index;
190+
function source_line_range(source::SourceFile, byte_index::Integer;
81191
context_lines_before=0, context_lines_after=0)
82192
lineidx = _source_line_index(source, byte_index)
83193
fbyte = source.line_starts[max(lineidx-context_lines_before, 1)]
@@ -86,14 +196,14 @@ function source_line_range(source::SourceFile, byte_index;
86196
lbyte + source.byte_offset)
87197
end
88198

89-
function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index)
90-
LineNumberNode(source_line(source, byte_index),
91-
isnothing(source.filename) ? nothing : Symbol(source.filename))
199+
function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index::Integer)
200+
fn = filename(source)
201+
LineNumberNode(source_line(source, byte_index), isempty(fn) ? nothing : Symbol(fn))
92202
end
93203

94204
function Base.show(io::IO, ::MIME"text/plain", source::SourceFile)
95-
fn = isnothing(source.filename) ? "" : " $(source.filename)"
96-
header = "## SourceFile$fn ##"
205+
fn = filename(source)
206+
header = "## SourceFile$(isempty(fn) ? "" : " ")$fn ##"
97207
print(io, header, "\n")
98208
heightlim = displaysize(io)[1] ÷ 2
99209
if !get(io, :limit, false) || length(source.line_starts) <= heightlim
@@ -193,27 +303,6 @@ function _print_marker_line(io, prefix_str, str, underline, singleline, color,
193303
end
194304
end
195305

196-
function highlight(io::IO, x; kws...)
197-
highlight(io, sourcefile(x), byte_range(x); kws...)
198-
end
199-
200-
"""
201-
highlight(io::IO, source::SourceFile, range::UnitRange;
202-
color, note, notecolor,
203-
context_lines_before, context_lines_inner, context_lines_after,
204-
highlight(io, x; kws...)
205-
206-
Print the lines of source code `source` surrounding the given byte `range`
207-
which is highlighted with background `color` and underlined with markers in the
208-
text. A `note` in `notecolor` may be provided as annotation.
209-
210-
In the second form, `x` is an object with `sourcefile(x)` and `byte_range(x)`
211-
implemented.
212-
213-
The context arguments `context_lines_before`, etc, refer to the number of
214-
lines of code which will be printed as context before and after, with `inner`
215-
referring to context lines inside a multiline region.
216-
"""
217306
function highlight(io::IO, source::SourceFile, range::UnitRange;
218307
color=(120,70,70), context_lines_before=2,
219308
context_lines_inner=1, context_lines_after=2,

src/syntax_tree.jl

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -121,31 +121,13 @@ head(node::AbstractSyntaxNode) = head(node.raw)
121121

122122
span(node::AbstractSyntaxNode) = span(node.raw)
123123

124-
first_byte(node::AbstractSyntaxNode) = node.position
125-
last_byte(node::AbstractSyntaxNode) = node.position + span(node) - 1
124+
byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) - 1)
126125

127-
"""
128-
byte_range(ex)
129-
130-
Return the range of bytes which `ex` covers in the source text.
131-
"""
132-
byte_range(ex) = first_byte(ex):last_byte(ex)
133-
134-
"""
135-
sourcetext(node)
136-
137-
Get the full source text of a node.
138-
"""
139-
function sourcetext(node::AbstractSyntaxNode)
140-
view(sourcefile(node), byte_range(node))
141-
end
142-
143-
source_line(node::AbstractSyntaxNode) = source_line(sourcefile(node), node.position)
144-
source_location(node::AbstractSyntaxNode) = source_location(sourcefile(node), node.position)
126+
sourcefile(node::AbstractSyntaxNode) = node.source
145127

146128
function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
147129
indent, show_byte_offsets)
148-
fname = sourcefile(node).filename
130+
fname = filename(node)
149131
line, col = source_location(node)
150132
posstr = "$(lpad(line, 4)):$(rpad(col,3))"
151133
if show_byte_offsets
@@ -192,7 +174,7 @@ end
192174

193175
function Base.show(io::IO, ::MIME"text/plain", node::AbstractSyntaxNode; show_byte_offsets=false)
194176
println(io, "line:col│$(show_byte_offsets ? " byte_range │" : "") tree │ file_name")
195-
_show_syntax_node(io, Ref{Union{Nothing,String}}(nothing), node, "", show_byte_offsets)
177+
_show_syntax_node(io, Ref(""), node, "", show_byte_offsets)
196178
end
197179

198180
function Base.show(io::IO, ::MIME"text/x.sexpression", node::AbstractSyntaxNode)

0 commit comments

Comments
 (0)