Skip to content

Commit c076e10

Browse files
authored
Merge pull request #311 from JuliaLang/c42f/fix-SourceFile-offsets
Fixes for `SourceFile` byte offsets
2 parents ec51994 + 325a850 commit c076e10

File tree

11 files changed

+88
-46
lines changed

11 files changed

+88
-46
lines changed

src/diagnostics.jl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,6 @@ function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text:
9191
show_diagnostics(io, diagnostics, SourceFile(text))
9292
end
9393

94-
function emit_diagnostic(diagnostics::AbstractVector{Diagnostic},
95-
byterange::AbstractUnitRange; kws...)
96-
push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...))
97-
end
98-
9994
function any_error(diagnostics::AbstractVector{Diagnostic})
10095
any(is_error(d) for d in diagnostics)
10196
end

src/expr.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -457,9 +457,8 @@ end
457457

458458
function build_tree(::Type{Expr}, stream::ParseStream;
459459
filename=nothing, first_line=1, kws...)
460-
source = SourceFile(sourcetext(stream), first_index=first_byte(stream),
461-
filename=filename, first_line=first_line)
462-
txtbuf = textbuf(stream)
460+
source = SourceFile(stream, filename=filename, first_line=first_line)
461+
txtbuf = unsafe_textbuf(stream)
463462
args = Any[]
464463
childranges = UnitRange{Int}[]
465464
childheads = SyntaxHead[]

src/parse_stream.jl

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -901,11 +901,16 @@ function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition,
901901
emit_diagnostic(stream, fbyte:lbyte; kws...)
902902
end
903903

904+
function emit_diagnostic(diagnostics::AbstractVector{Diagnostic},
905+
byterange::AbstractUnitRange; kws...)
906+
push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...))
907+
end
908+
904909
#-------------------------------------------------------------------------------
905910
# ParseStream Post-processing
906911

907912
function validate_tokens(stream::ParseStream)
908-
txtbuf = textbuf(stream)
913+
txtbuf = unsafe_textbuf(stream)
909914
toks = stream.tokens
910915
charbuf = IOBuffer()
911916
for i = 2:length(toks)
@@ -1103,12 +1108,19 @@ function sourcetext(stream::ParseStream; steal_textbuf=false)
11031108
SubString(str, first_byte(stream), thisind(str, last_byte(stream)))
11041109
end
11051110

1111+
function SourceFile(stream::ParseStream; kws...)
1112+
return SourceFile(sourcetext(stream); first_index=first_byte(stream), kws...)
1113+
end
1114+
11061115
"""
1107-
textbuf(stream)
1116+
unsafe_textbuf(stream)
11081117
11091118
Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`.
1119+
1120+
!!! warning
1121+
The caller must hold a reference to `stream` while using textbuf
11101122
"""
1111-
textbuf(stream) = stream.textbuf
1123+
unsafe_textbuf(stream) = stream.textbuf
11121124

11131125
first_byte(stream::ParseStream) = first(stream.tokens).next_byte # Use sentinel token
11141126
last_byte(stream::ParseStream) = _next_byte(stream)-1

src/parser.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ function emit_diagnostic(ps::ParseState, args...; kws...)
125125
emit_diagnostic(ps.stream, args...; kws...)
126126
end
127127

128-
function textbuf(ps::ParseState)
129-
textbuf(ps.stream)
128+
function unsafe_textbuf(ps::ParseState)
129+
unsafe_textbuf(ps.stream)
130130
end
131131

132132
function first_child_position(ps::ParseState, pos::ParseStreamPosition)
@@ -3143,7 +3143,7 @@ function parse_brackets(after_parse::Function,
31433143
return opts
31443144
end
31453145

3146-
is_indentation(b::UInt8) = (b == UInt8(' ') || b == UInt8('\t'))
3146+
_is_indentation(b::UInt8) = (b == u8" " || b == u8"\t")
31473147

31483148
# Parse a string, embedded interpolations and deindent triple quoted strings
31493149
# by marking indentation characters as whitespace trivia.
@@ -3157,7 +3157,7 @@ function parse_string(ps::ParseState, raw::Bool)
31573157
indent_ref_i = 0
31583158
indent_ref_len = typemax(Int)
31593159
indent_chunks = acquire_positions(ps.stream)
3160-
buf = textbuf(ps)
3160+
txtbuf = unsafe_textbuf(ps)
31613161
chunk_flags = raw ? RAW_STRING_FLAG : EMPTY_FLAGS
31623162
bump(ps, TRIVIA_FLAG)
31633163
first_chunk = true
@@ -3212,10 +3212,10 @@ function parse_string(ps::ParseState, raw::Bool)
32123212
if triplestr && first_chunk && span(t) <= 2 &&
32133213
begin
32143214
s = span(t)
3215-
b = buf[last_byte(t)]
3215+
b = txtbuf[last_byte(t)]
32163216
# Test whether the string is a single logical newline
3217-
(s == 1 && (b == UInt8('\n') || b == UInt8('\r'))) ||
3218-
(s == 2 && (buf[first_byte(t)] == UInt8('\r') && b == UInt8('\n')))
3217+
(s == 1 && (b == u8"\n" || b == u8"\r")) ||
3218+
(s == 2 && (txtbuf[first_byte(t)] == u8"\r" && b == u8"\n"))
32193219
end
32203220
# First line of triple string is a newline only: mark as trivia.
32213221
# """\nx""" ==> (string-s "x")
@@ -3253,8 +3253,8 @@ function parse_string(ps::ParseState, raw::Bool)
32533253
# """\n $a \n $b""" ==> (string-s a " \n" b)
32543254
# """\n $a\n $b\n""" ==> (string-s " " a "\n" " " b "\n")
32553255
#
3256-
if prev_chunk_newline && (b = buf[first_byte(t)];
3257-
b != UInt8('\n') && b != UInt8('\r'))
3256+
if prev_chunk_newline && (b = txtbuf[first_byte(t)];
3257+
b != u8"\n" && b != u8"\r")
32583258
# Compute length of longest common prefix of mixed
32593259
# spaces and tabs, in bytes
32603260
#
@@ -3267,7 +3267,7 @@ function parse_string(ps::ParseState, raw::Bool)
32673267
# No indentation found yet. Find indentation we'll
32683268
# use as a reference
32693269
i = first_byte(t) - 1
3270-
while i < last_byte(t) && is_indentation(buf[i+1])
3270+
while i < last_byte(t) && _is_indentation(txtbuf[i+1])
32713271
i += 1
32723272
end
32733273
indent_ref_i = first_byte(t)
@@ -3277,7 +3277,7 @@ function parse_string(ps::ParseState, raw::Bool)
32773277
# shortening length if necessary.
32783278
j = 0
32793279
while j < span(t) && j < indent_ref_len
3280-
if buf[j + first_byte(t)] != buf[j + indent_ref_i]
3280+
if txtbuf[j + first_byte(t)] != txtbuf[j + indent_ref_i]
32813281
break
32823282
end
32833283
j += 1
@@ -3287,7 +3287,7 @@ function parse_string(ps::ParseState, raw::Bool)
32873287
# Prepare a place for indentiation trivia, if necessary
32883288
push!(indent_chunks, bump_invisible(ps, K"TOMBSTONE"))
32893289
end
3290-
b = buf[last_byte(t)]
3290+
b = txtbuf[last_byte(t)]
32913291
prev_chunk_newline = b == UInt8('\n') || b == UInt8('\r')
32923292
end
32933293
bump(ps, chunk_flags)

src/parser_api.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ struct ParseError <: Exception
1111
end
1212

1313
function ParseError(stream::ParseStream; incomplete_tag=:none, kws...)
14-
source = SourceFile(sourcetext(stream); kws...)
14+
source = SourceFile(stream; kws...)
1515
ParseError(source, stream.diagnostics, incomplete_tag)
1616
end
1717

src/source_files.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ function source_line_range(source::SourceFile, byte_index;
7676
lineidx = _source_line_index(source, byte_index)
7777
fbyte = source.line_starts[max(lineidx-context_lines_before, 1)]
7878
lbyte = source.line_starts[min(lineidx+1+context_lines_after, end)] - 1
79-
fbyte,lbyte
79+
return (fbyte + source.byte_offset,
80+
lbyte + source.byte_offset)
8081
end
8182

8283
function source_location(::Type{LineNumberNode}, source::SourceFile, byte_index)
@@ -120,7 +121,7 @@ function Base.getindex(source::SourceFile, i::Int)
120121
end
121122

122123
function Base.thisind(source::SourceFile, i::Int)
123-
thisind(source.code, i - source.byte_offset)
124+
thisind(source.code, i - source.byte_offset) + source.byte_offset
124125
end
125126

126127
Base.firstindex(source::SourceFile) = firstindex(source.code) + source.byte_offset

src/syntax_tree.jl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,11 @@ Base.show(io::IO, ::ErrorVal) = printstyled(io, "✘", color=:light_red)
6161

6262
function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead};
6363
keep_parens=false, position::Integer=1)
64-
offset, txtbuf = _unsafe_wrap_substring(sourcetext(source))
65-
_to_SyntaxNode(source, txtbuf, offset, raw, convert(Int, position), keep_parens)
64+
GC.@preserve source begin
65+
raw_offset, txtbuf = _unsafe_wrap_substring(source.code)
66+
offset = raw_offset - source.byte_offset
67+
_to_SyntaxNode(source, txtbuf, offset, raw, convert(Int, position), keep_parens)
68+
end
6669
end
6770

6871
function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int,
@@ -222,8 +225,8 @@ Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.position, d
222225
function build_tree(::Type{SyntaxNode}, stream::ParseStream;
223226
filename=nothing, first_line=1, keep_parens=false, kws...)
224227
green_tree = build_tree(GreenNode, stream; kws...)
225-
source = SourceFile(sourcetext(stream), filename=filename, first_line=first_line)
226-
SyntaxNode(source, green_tree, position=1, keep_parens=keep_parens)
228+
source = SourceFile(stream, filename=filename, first_line=first_line)
229+
SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens)
227230
end
228231

229232
#-------------------------------------------------------------------------------

test/hooks.jl

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
function _unwrap_parse_error(core_hook_result)
2+
@test Meta.isexpr(core_hook_result[1], :error, 1)
3+
err = core_hook_result[1].args[1]
4+
if JuliaSyntax._has_v1_10_hooks
5+
@test err isa Meta.ParseError
6+
return err.detail
7+
else
8+
@test err isa JuliaSyntax.ParseError
9+
return err
10+
end
11+
end
12+
113
@testset "Hooks for Core integration" begin
214
@testset "whitespace parsing" begin
315
@test JuliaSyntax.core_parser_hook("", "somefile", 1, 0, :statement) == Core.svec(nothing, 0)
@@ -19,26 +31,28 @@
1931
@test ex.args[2] == LineNumberNode(2, "otherfile")
2032

2133
# Errors also propagate file & lineno
22-
err = JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement)[1].args[1]
23-
if JuliaSyntax._has_v1_10_hooks
24-
@test err isa Meta.ParseError
25-
err = err.detail
26-
else
27-
@test err isa JuliaSyntax.ParseError
28-
end
34+
err = _unwrap_parse_error(
35+
JuliaSyntax.core_parser_hook("[x)", "f1", 1, 0, :statement)
36+
)
2937
@test err isa JuliaSyntax.ParseError
3038
@test err.source.filename == "f1"
3139
@test err.source.first_line == 1
32-
err = JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement)[1].args[1]
33-
if JuliaSyntax._has_v1_10_hooks
34-
@test err isa Meta.ParseError
35-
err = err.detail
36-
else
37-
@test err isa JuliaSyntax.ParseError
38-
end
40+
err = _unwrap_parse_error(
41+
JuliaSyntax.core_parser_hook("[x)", "f2", 2, 0, :statement)
42+
)
3943
@test err isa JuliaSyntax.ParseError
4044
@test err.source.filename == "f2"
4145
@test err.source.first_line == 2
46+
47+
# Errors including nontrivial offset indices
48+
err = _unwrap_parse_error(
49+
JuliaSyntax.core_parser_hook("a\nh{x)\nb", "test.jl", 1, 2, :statement)
50+
)
51+
@test err isa JuliaSyntax.ParseError
52+
@test err.source.first_line == 1
53+
@test err.diagnostics[1].first_byte == 6
54+
@test err.diagnostics[1].last_byte == 5
55+
@test err.diagnostics[1].message == "Expected `}`"
4256
end
4357

4458
@testset "toplevel errors" begin

test/source_files.jl

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,14 @@
2828
end
2929

3030
# byte offset
31-
@test source_location(SourceFile("a\nbb\nccc\ndddd", first_index=10), 13) == (2,2)
32-
@test source_line(SourceFile("a\nbb\nccc\ndddd", first_index=10), 15) == 3
31+
sf = SourceFile("a\nbb\nccc\ndddd", first_index=10)
32+
@test source_location(sf, 13) == (2,2)
33+
@test source_line(sf, 15) == 3
34+
@test source_line_range(sf, 10) == (10,11)
35+
@test source_line_range(sf, 11) == (10,11)
36+
@test source_line_range(sf, 12) == (12,14)
37+
@test source_line_range(sf, 14) == (12,14)
38+
@test source_line_range(sf, 15) == (15,18)
3339

3440
# source_line convenience function
3541
@test source_line(SourceFile("a\nb\n"), 2) == 1
@@ -52,6 +58,11 @@ end
5258
@test sf[10:11] == "ab"
5359
@test view(sf, 10:11) == "ab"
5460

61+
@test thisind(SourceFile("xαx", first_index=10), 10) == 10
62+
@test thisind(SourceFile("xαx", first_index=10), 11) == 11
63+
@test thisind(SourceFile("xαx", first_index=10), 12) == 11
64+
@test thisind(SourceFile("xαx", first_index=10), 13) == 13
65+
5566
if Base.VERSION >= v"1.4"
5667
# Protect the `[begin` from being viewed by the parser on older Julia versions
5768
@test eval(Meta.parse("SourceFile(\"a\nb\n\")[begin:end]")) == "a\nb\n"

test/syntax_tree.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@
4949
@test length(children(node)) == 2
5050
node[2] = parsestmt(SyntaxNode, "y")
5151
@test sourcetext(child(node, 2)) == "y"
52+
53+
# SyntaxNode with offsets
54+
t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13)
55+
@test t.position == 13
56+
@test child(t,1).position == 19
57+
@test child(t,1).val == :b
5258
end
5359

5460
@testset "SyntaxNode pretty printing" begin

0 commit comments

Comments
 (0)