Skip to content

Commit 9c26a47

Browse files
committed
Clean up emit_diagnostic stream position handling
`emit_diagnostic(ps, mark, ...)` now behaves like `emit(ps, mark, ...)` in terms of the source range covered by `mark`. This is much less confusing than the previous behavior.
1 parent 2becf30 commit 9c26a47

File tree

7 files changed

+94
-61
lines changed

7 files changed

+94
-61
lines changed

src/diagnostics.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ function show_diagnostics(io::IO, diagnostics::AbstractVector{Diagnostic}, text:
9292
end
9393

9494
function emit_diagnostic(diagnostics::AbstractVector{Diagnostic},
95-
fbyte::Integer, lbyte::Integer; kws...)
96-
push!(diagnostics, Diagnostic(fbyte, lbyte; kws...))
95+
byterange::AbstractUnitRange; kws...)
96+
push!(diagnostics, Diagnostic(first(byterange), last(byterange); kws...))
9797
end
9898

9999
function any_error(diagnostics::AbstractVector{Diagnostic})

src/kinds.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1133,6 +1133,10 @@ function is_radical_op(x)
11331133
kind(x) in (K"", K"", K"")
11341134
end
11351135

1136+
"""
1137+
Return true if `x` has whitespace or comment kind
1138+
"""
11361139
function is_whitespace(x)
1137-
kind(x) in (K"Whitespace", K"NewlineWs")
1140+
k = kind(x)
1141+
return k == K"Whitespace" || k == K"NewlineWs" || k == K"Comment"
11381142
end

src/literal_parsing.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ function unescape_julia_string(io::IO, str::AbstractString,
261261
u = m == 4 ? 'u' : 'U'
262262
msg = (m == 2) ? "invalid hex escape sequence" :
263263
"invalid unicode escape sequence"
264-
emit_diagnostic(diagnostics, escstart, i, error=msg)
264+
emit_diagnostic(diagnostics, escstart:i, error=msg)
265265
had_error = true
266266
else
267267
if m == 2 # \x escape sequence
@@ -279,7 +279,7 @@ function unescape_julia_string(io::IO, str::AbstractString,
279279
i += 1
280280
end
281281
if n > 255
282-
emit_diagnostic(diagnostics, escstart, i,
282+
emit_diagnostic(diagnostics, escstart:i,
283283
error="invalid octal escape sequence")
284284
had_error = true
285285
else
@@ -303,7 +303,7 @@ function unescape_julia_string(io::IO, str::AbstractString,
303303
c == '`' ? '`' :
304304
nothing
305305
if isnothing(u)
306-
emit_diagnostic(diagnostics, escstart, i,
306+
emit_diagnostic(diagnostics, escstart:i,
307307
error="invalid escape sequence")
308308
had_error = true
309309
else

src/parse_stream.jl

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ function _buffer_lookahead_tokens(lexer, lookahead)
359359
while true
360360
raw = Tokenize.next_token(lexer)
361361
k = kind(raw)
362-
was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs")
362+
was_whitespace = is_whitespace(k)
363363
had_whitespace |= was_whitespace
364364
f = EMPTY_FLAGS
365365
raw.dotop && (f |= DOTOP_FLAG)
@@ -622,7 +622,7 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None
622622
break
623623
end
624624
f = flags | (@__MODULE__).flags(tok)
625-
is_trivia = k (K"Whitespace", K"Comment", K"NewlineWs")
625+
is_trivia = is_whitespace(k)
626626
is_trivia && (f |= TRIVIA_FLAG)
627627
outk = (is_trivia || remap_kind == K"None") ? k : remap_kind
628628
h = SyntaxHead(outk, f)
@@ -686,7 +686,7 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS;
686686
h = SyntaxHead(kind, flags)
687687
push!(stream.tokens, SyntaxToken(h, (@__MODULE__).kind(h), false, b))
688688
if !isnothing(error)
689-
emit_diagnostic(stream, b, b-1, error=error)
689+
emit_diagnostic(stream, b:b-1, error=error)
690690
end
691691
stream.peek_count = 0
692692
return position(stream)
@@ -797,12 +797,6 @@ function Base.position(stream::ParseStream)
797797
ParseStreamPosition(lastindex(stream.tokens), lastindex(stream.ranges))
798798
end
799799

800-
# Get position of next item to be emitted into the output stream
801-
# TODO: Figure out how to remove this? It's only used with emit_diagnostic
802-
function next_position(stream::ParseStream)
803-
ParseStreamPosition(lastindex(stream.tokens)+1, lastindex(stream.ranges)+1)
804-
end
805-
806800
"""
807801
emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing)
808802
@@ -819,14 +813,14 @@ function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind,
819813
# nested.
820814
fbyte = token_first_byte(stream, first_token)
821815
lbyte = token_last_byte(stream, lastindex(stream.tokens))
822-
emit_diagnostic(stream, fbyte, lbyte, error=error)
816+
emit_diagnostic(stream, fbyte:lbyte, error=error)
823817
end
824818
push!(stream.ranges, range)
825819
return position(stream)
826820
end
827821

828-
function emit_diagnostic(stream::ParseStream, fbyte::Integer, lbyte::Integer; kws...)
829-
emit_diagnostic(stream.diagnostics, fbyte, lbyte; kws...)
822+
function emit_diagnostic(stream::ParseStream, byterange::AbstractUnitRange; kws...)
823+
emit_diagnostic(stream.diagnostics, byterange; kws...)
830824
return nothing
831825
end
832826

@@ -849,20 +843,30 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...)
849843
end
850844
fbyte = lookahead_token_first_byte(stream, begin_tok_i)
851845
lbyte = lookahead_token_last_byte(stream, end_tok_i)
852-
emit_diagnostic(stream, fbyte, lbyte; kws...)
846+
emit_diagnostic(stream, fbyte:lbyte; kws...)
853847
return nothing
854848
end
855849

856-
function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; kws...)
857-
emit_diagnostic(stream, token_first_byte(stream, mark.token_index),
858-
_next_byte(stream) - 1; kws...)
850+
function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; trim_whitespace=true, kws...)
851+
i = mark.token_index
852+
j = lastindex(stream.tokens)
853+
if trim_whitespace
854+
while i < j && is_whitespace(stream.tokens[j])
855+
j -= 1
856+
end
857+
while i+1 < j && is_whitespace(stream.tokens[i+1])
858+
i += 1
859+
end
860+
end
861+
byterange = stream.tokens[i].next_byte:stream.tokens[j].next_byte-1
862+
emit_diagnostic(stream, byterange; kws...)
859863
end
860864

861865
function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition,
862866
end_mark::ParseStreamPosition; kws...)
863-
fbyte = token_first_byte(stream, mark.token_index)
864-
lbyte = token_first_byte(stream, end_mark.token_index) - 1
865-
emit_diagnostic(stream, fbyte, lbyte; kws...)
867+
fbyte = stream.tokens[mark.token_index].next_byte
868+
lbyte = stream.tokens[end_mark.token_index].next_byte-1
869+
emit_diagnostic(stream, fbyte:lbyte; kws...)
866870
end
867871

868872
#-------------------------------------------------------------------------------
@@ -898,11 +902,11 @@ function validate_tokens(stream::ParseStream)
898902
if code === :ok
899903
# pass
900904
elseif code === :overflow
901-
emit_diagnostic(stream, fbyte, lbyte,
905+
emit_diagnostic(stream, fbyte:lbyte,
902906
error="overflow in floating point literal")
903907
error_kind = K"ErrorNumericOverflow"
904908
elseif underflow0
905-
emit_diagnostic(stream, fbyte, lbyte,
909+
emit_diagnostic(stream, fbyte:lbyte,
906910
warning="underflow to zero in floating point literal")
907911
end
908912
elseif k == K"Char"
@@ -917,7 +921,7 @@ function validate_tokens(stream::ParseStream)
917921
read(charbuf, Char)
918922
if !eof(charbuf)
919923
error_kind = K"ErrorOverLongCharacter"
920-
emit_diagnostic(stream, fbyte, lbyte,
924+
emit_diagnostic(stream, fbyte:lbyte,
921925
error="character literal contains multiple characters")
922926
end
923927
end
@@ -929,7 +933,7 @@ function validate_tokens(stream::ParseStream)
929933
end
930934
elseif is_error(k) && k != K"error"
931935
# Emit messages for non-generic token errors
932-
emit_diagnostic(stream, fbyte, lbyte,
936+
emit_diagnostic(stream, fbyte:lbyte,
933937
error=_token_error_descriptions[k])
934938
end
935939
if error_kind != K"None"

src/parser.jl

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,6 @@ function Base.position(ps::ParseState, args...)
113113
position(ps.stream, args...)
114114
end
115115

116-
function next_position(ps::ParseState, args...)
117-
next_position(ps.stream, args...)
118-
end
119-
120116
function emit(ps::ParseState, args...; kws...)
121117
emit(ps.stream, args...; kws...)
122118
end
@@ -1230,11 +1226,10 @@ function parse_unary(ps::ParseState)
12301226
space_before_paren = preceding_whitespace(t2)
12311227
if space_before_paren
12321228
# Setup possible whitespace error between operator and (
1233-
ws_node_mark = position(ps)
1234-
ws_mark = next_position(ps)
1229+
ws_mark = position(ps)
12351230
bump_trivia(ps)
1236-
ws_error_pos = emit(ps, ws_node_mark, K"TOMBSTONE")
1237-
ws_mark_end = next_position(ps)
1231+
ws_error_pos = emit(ps, ws_mark, K"TOMBSTONE")
1232+
ws_mark_end = position(ps)
12381233
end
12391234

12401235
mark_before_paren = position(ps)
@@ -1617,7 +1612,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16171612
k = peek(ps)
16181613
if k == K"("
16191614
if is_macrocall
1620-
bump_invisible(ps, K"error")
1615+
# @M.(x) ==> (macrocall (dotcall @M (error-t) x))
1616+
bump_invisible(ps, K"error", TRIVIA_FLAG)
16211617
emit_diagnostic(ps, mark,
16221618
error="dot call syntax not supported for macros")
16231619
end
@@ -1662,7 +1658,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16621658
end
16631659
parse_macro_name(ps)
16641660
macro_name_position = position(ps)
1665-
macro_atname_range = (m, next_position(ps))
1661+
macro_atname_range = (m, position(ps))
16661662
emit(ps, m, K"quote")
16671663
emit(ps, mark, K".")
16681664
elseif k == K"'"
@@ -1857,7 +1853,6 @@ function parse_resword(ps::ParseState)
18571853
elseif word in KSet"global local"
18581854
# global x ==> (global x)
18591855
# local x ==> (local x)
1860-
# global x,y ==> (global x y)
18611856
bump(ps, TRIVIA_FLAG)
18621857
const_mark = nothing
18631858
if peek(ps) == K"const"
@@ -2080,16 +2075,17 @@ function parse_global_local_const_vars(ps)
20802075
mark = position(ps)
20812076
n_commas = parse_comma(ps, false)
20822077
t = peek_token(ps)
2083-
assign_prec = is_prec_assignment(t)
2084-
if n_commas >= 1 && assign_prec
2085-
# const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2)))
2086-
emit(ps, mark, K"tuple")
2087-
end
2088-
if assign_prec
2078+
if is_prec_assignment(t)
2079+
if n_commas >= 1
2080+
# const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2)))
2081+
emit(ps, mark, K"tuple")
2082+
end
20892083
# const x = 1 ==> (const (= x 1))
20902084
# global x ~ 1 ==> (global (call-i x ~ 1))
20912085
# global x += 1 ==> (global (+= x 1))
20922086
parse_assignment_with_initial_ex(ps, mark, parse_comma)
2087+
else
2088+
# global x,y ==> (global x y)
20932089
end
20942090
return kind(t) == K"=" && !is_dotted(t)
20952091
end
@@ -2106,7 +2102,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool)
21062102
kb = peek_behind(ps).orig_kind
21072103
if is_initial_reserved_word(ps, kb)
21082104
# macro while(ex) end ==> (macro (call (error while) ex) (block))
2109-
emit(ps, mark, K"error", error="Invalid macro name")
2105+
emit(ps, mark, K"error", error="invalid macro name")
21102106
else
21112107
# macro f() end ==> (macro (call f) (block))
21122108
# macro (:)(ex) end ==> (macro (call (parens :) ex) (block))
@@ -2228,7 +2224,6 @@ function parse_try(ps)
22282224
out_kind = K"try"
22292225
mark = position(ps)
22302226
bump(ps, TRIVIA_FLAG)
2231-
diagnostic_mark = position(ps)
22322227
parse_block(ps)
22332228
has_catch = false
22342229
has_else = false
@@ -2278,8 +2273,8 @@ function parse_try(ps)
22782273
out_kind = K"try_finally_catch"
22792274
m = position(ps)
22802275
parse_catch(ps)
2281-
emit_diagnostic(ps, m, position(ps),
2282-
warning="`catch` after `finally` will execute out of order")
2276+
emit_diagnostic(ps, m,
2277+
warning="`catch` after `finally` will execute out of order")
22832278
end
22842279
missing_recovery = !has_catch && !has_finally
22852280
if missing_recovery
@@ -2289,7 +2284,7 @@ function parse_try(ps)
22892284
bump_closing_token(ps, K"end")
22902285
emit(ps, mark, out_kind, flags)
22912286
if missing_recovery
2292-
emit_diagnostic(ps, diagnostic_mark, error="try without catch or finally")
2287+
emit_diagnostic(ps, mark, error="try without catch or finally")
22932288
end
22942289
end
22952290

@@ -2359,11 +2354,9 @@ function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=not
23592354
if isnothing(name_kind)
23602355
name_kind = (k == K"Identifier") ? K"MacroName" : K"error"
23612356
if name_kind == K"error"
2362-
# Hack to handle bad but unusual syntax like `@A.$x a`
2363-
ri = macro_name_position.range_index
2364-
startpos = ParseStreamPosition(ps.stream.ranges[ri].first_token, ri)
2365-
# This isn't quite accurate
2366-
emit_diagnostic(ps, startpos, macro_name_position, error="Invalid macro name")
2357+
# TODO: This isn't quite accurate
2358+
emit_diagnostic(ps, macro_name_position, macro_name_position,
2359+
error="invalid macro name")
23672360
end
23682361
end
23692362
reset_node!(ps, macro_name_position, kind=name_kind)
@@ -2383,10 +2376,11 @@ function parse_macro_name(ps::ParseState)
23832376
k = peek(ps)
23842377
parse_atom(ps, false)
23852378
if k == K"("
2386-
emit_diagnostic(ps, mark, warning="parenthesizing macro names is unnecessary")
2379+
emit_diagnostic(ps, mark,
2380+
warning="parenthesizing macro names is unnecessary")
23872381
elseif !(peek_behind(ps).kind in KSet"Identifier var")
23882382
# @[x] y z ==> (macrocall (error (vect x)) y z)
2389-
emit(ps, mark, K"error", error="Invalid macro name")
2383+
emit(ps, mark, K"error", error="invalid macro name")
23902384
end
23912385
end
23922386

test/diagnostics.jl

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,44 @@
1-
function diagnostic(str; allow_multiple=false)
1+
function diagnostic(str; only_first=false, allow_multiple=false)
22
stream = ParseStream(str)
33
parse!(stream)
44
if allow_multiple
55
stream.diagnostics
66
else
7-
@test length(stream.diagnostics) == 1
8-
only(stream.diagnostics)
7+
if !only_first
8+
@test length(stream.diagnostics) == 1
9+
end
10+
return stream.diagnostics[1]
911
end
1012
end
1113

14+
@testset "parser errors" begin
15+
@test diagnostic("+ #==# (a,b)") ==
16+
Diagnostic(2, 7, :error, "whitespace not allowed between prefix function call and argument list")
17+
@test diagnostic("[email protected]", only_first=true) ==
18+
Diagnostic(3, 4, :error, "`@` must appear on first or last macro name component")
19+
@test diagnostic("@M.(x)") ==
20+
Diagnostic(1, 3, :error, "dot call syntax not supported for macros")
21+
22+
@test diagnostic("try x end") ==
23+
Diagnostic(1, 9, :error, "try without catch or finally")
24+
# TODO: better range
25+
@test diagnostic("@A.\$x a") ==
26+
Diagnostic(6, 5, :error, "invalid macro name")
27+
end
28+
29+
@testset "parser warnings" begin
30+
@test diagnostic("@(A)", only_first=true) ==
31+
Diagnostic(2, 4, :warning, "parenthesizing macro names is unnecessary")
32+
@test diagnostic("try finally catch a ; b end") ==
33+
Diagnostic(13, 23, :warning, "`catch` after `finally` will execute out of order")
34+
@test diagnostic("import . .A") ==
35+
Diagnostic(9, 10, :warning, "space between dots in import path")
36+
@test diagnostic("import A .==") ==
37+
Diagnostic(9, 9, :warning, "space between dots in import path")
38+
@test diagnostic("import A.:+") ==
39+
Diagnostic(10, 10, :warning, "quoting with `:` in import is unnecessary")
40+
end
41+
1242
@testset "diagnostics for literal parsing" begin
1343
# Float overflow/underflow
1444
@test diagnostic("x = 10.0e1000;") ==

test/parser.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ tests = [
385385
"A.B.@x" => "(macrocall (. (. A (quote B)) (quote @x)))"
386386
"@A.B.x" => "(macrocall (. (. A (quote B)) (quote @x)))"
387387
"[email protected]" => "(macrocall (. (. A (quote B)) (error-t) (quote @x)))"
388+
"@M.(x)" => "(macrocall (dotcall @M (error-t) x))"
388389
"f.(a,b)" => "(dotcall f a b)"
389390
"f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" =>
390391
Expr(:., :f, Expr(:tuple, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1)))

0 commit comments

Comments
 (0)