Skip to content

Commit b92fc5e

Browse files
authored
Replace K"true" and K"false" with K"Bool" (#488)
Use a single `K"Bool"` Kind for booleans. This is both more convenient and more consistent with other literal kinds such as K"Integer" which group all integers under a single kind. Replace the use of the invisible `K"false"` token in catch blocks with a new kind `K"Placeholder"` - this removes the last of the invisible tokens, other than error tokens! K"Placeholder" will also be really useful for JuliaLowering as a kind for all-underscore identifiers.
1 parent abf099e commit b92fc5e

File tree

8 files changed

+50
-34
lines changed

8 files changed

+50
-34
lines changed

src/expr.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,10 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
326326
args = Any[args[1], a2a...]
327327
end
328328
end
329+
elseif k == K"catch"
330+
if kind(childheads[1]) == K"Placeholder"
331+
args[1] = false
332+
end
329333
elseif k == K"try"
330334
# Try children in source order:
331335
# try_block catch_var catch_block else_block finally_block

src/kinds.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ register_kinds!(JuliaSyntax, 0, [
197197
# Identifiers
198198
"BEGIN_IDENTIFIERS"
199199
"Identifier"
200+
"Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering
200201
# Macro names are modelled as special kinds of identifiers because the full
201202
# macro name may not appear as characters in the source: The `@` may be
202203
# detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd
@@ -253,6 +254,7 @@ register_kinds!(JuliaSyntax, 0, [
253254
"END_KEYWORDS"
254255

255256
"BEGIN_LITERAL"
257+
"Bool"
256258
"Integer"
257259
"BinInt"
258260
"HexInt"
@@ -262,8 +264,6 @@ register_kinds!(JuliaSyntax, 0, [
262264
"String"
263265
"Char"
264266
"CmdString"
265-
"true"
266-
"false"
267267
"END_LITERAL"
268268

269269
"BEGIN_DELIMITERS"
@@ -1067,7 +1067,7 @@ register_kinds!(JuliaSyntax, 0, [
10671067

10681068
# Special tokens
10691069
"TOMBSTONE" # Empty placeholder for kind to be filled later
1070-
"None" # Placeholder; never emitted by lexer
1070+
"None" # Never emitted by lexer/parser
10711071
"EndMarker" # EOF
10721072

10731073
"BEGIN_ERRORS"
@@ -1097,6 +1097,7 @@ const _nonunique_kind_names = Set([
10971097
K"Whitespace"
10981098
K"NewlineWs"
10991099
K"Identifier"
1100+
K"Placeholder"
11001101

11011102
K"ErrorEofMultiComment"
11021103
K"ErrorInvalidNumericConstant"
@@ -1169,6 +1170,7 @@ const _token_error_descriptions = Dict{Kind, String}(
11691170

11701171
#-------------------------------------------------------------------------------
11711172
# Predicates
1173+
is_identifier(k::Kind) = K"BEGIN_IDENTIFIERS" <= k <= K"END_IDENTIFIERS"
11721174
is_contextual_keyword(k::Kind) = K"BEGIN_CONTEXTUAL_KEYWORDS" <= k <= K"END_CONTEXTUAL_KEYWORDS"
11731175
is_error(k::Kind) = K"BEGIN_ERRORS" <= k <= K"END_ERRORS" || k == K"ErrorInvalidOperator" || k == K"Error**"
11741176
is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS"
@@ -1177,6 +1179,7 @@ is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL"
11771179
is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS"
11781180
is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where")
11791181

1182+
is_identifier(k) = is_identifier(kind(k))
11801183
is_contextual_keyword(k) = is_contextual_keyword(kind(k))
11811184
is_error(k) = is_error(kind(k))
11821185
is_keyword(k) = is_keyword(kind(k))

src/literal_parsing.jl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -406,10 +406,8 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
406406
last(srcrange)+1, Diagnostic[])
407407
end
408408
return had_error ? ErrorVal() : String(take!(io))
409-
elseif k == K"true"
410-
return true
411-
elseif k == K"false"
412-
return false
409+
elseif k == K"Bool"
410+
return txtbuf[first(srcrange)] == u8"t"
413411
end
414412

415413
# TODO: Avoid allocating temporary String here
@@ -418,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
418416
parse_int_literal(val_str)
419417
elseif k in KSet"BinInt OctInt HexInt"
420418
parse_uint_literal(val_str, k)
421-
elseif k == K"Identifier"
419+
elseif k == K"Identifier" || k == K"Placeholder"
422420
if has_flags(head, RAW_STRING_FLAG)
423421
io = IOBuffer()
424422
unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false)

src/parser.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2254,15 +2254,15 @@ function parse_try(ps)
22542254
if peek(ps) == K"else"
22552255
# catch-else syntax: https://github.com/JuliaLang/julia/pull/42211
22562256
#
2257-
#v1.8: try catch ; else end ==> (try (block) (catch false (block)) (else (block)))
2257+
#v1.8: try catch ; else end ==> (try (block) (catch (block)) (else (block)))
22582258
else_mark = position(ps)
22592259
bump(ps, TRIVIA_FLAG)
22602260
parse_block(ps)
22612261
if !has_catch
22622262
#v1.8: try else x finally y end ==> (try (block) (else (error (block x))) (finally (block y)))
22632263
emit(ps, else_mark, K"error", error="Expected `catch` before `else`")
22642264
end
2265-
#v1.7: try catch ; else end ==> (try (block) (catch false (block)) (else (error (block))))
2265+
#v1.7: try catch ; else end ==> (try (block) (catch (block)) (else (error (block))))
22662266
min_supported_version(v"1.8", ps, else_mark, "`else` after `catch`")
22672267
emit(ps, else_mark, K"else")
22682268
end
@@ -2302,10 +2302,10 @@ function parse_catch(ps::ParseState)
23022302
bump(ps, TRIVIA_FLAG)
23032303
k = peek(ps)
23042304
if k in KSet"NewlineWs ;" || is_closing_token(ps, k)
2305-
# try x catch end ==> (try (block x) (catch false (block)))
2306-
# try x catch ; y end ==> (try (block x) (catch false (block y)))
2307-
# try x catch \n y end ==> (try (block x) (catch false (block y)))
2308-
bump_invisible(ps, K"false")
2305+
# try x catch end ==> (try (block x) (catch (block)))
2306+
# try x catch ; y end ==> (try (block x) (catch (block y)))
2307+
# try x catch \n y end ==> (try (block x) (catch (block y)))
2308+
bump_invisible(ps, K"Placeholder")
23092309
else
23102310
# try x catch e y end ==> (try (block x) (catch e (block y)))
23112311
# try x catch $e y end ==> (try (block x) (catch ($ e) (block y)))

src/syntax_tree.jl

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,19 @@ byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node)
125125

126126
sourcefile(node::AbstractSyntaxNode) = node.source
127127

128+
function leaf_string(ex)
129+
if !is_leaf(ex)
130+
throw(ArgumentError("_value_string should be used for leaf nodes only"))
131+
end
132+
k = kind(ex)
133+
value = ex.val
134+
# TODO: Dispatch on kind extension module (??)
135+
return k == K"Placeholder" ? ""*string(value) :
136+
is_identifier(k) ? string(value) :
137+
value isa Symbol ? string(value) : # see parse_julia_literal for other cases which go here
138+
repr(value)
139+
end
140+
128141
function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
129142
indent, show_byte_offsets)
130143
fname = filename(node)
@@ -134,8 +147,7 @@ function _show_syntax_node(io, current_filename, node::AbstractSyntaxNode,
134147
posstr *= "$(lpad(first_byte(node),6)):$(rpad(last_byte(node),6))"
135148
end
136149
val = node.val
137-
nodestr = !is_leaf(node) ? "[$(untokenize(head(node)))]" :
138-
isa(val, Symbol) ? string(val) : repr(val)
150+
nodestr = is_leaf(node) ? leaf_string(node) : "[$(untokenize(head(node)))]"
139151
treestr = string(indent, nodestr)
140152
# Add filename if it's changed from the previous node
141153
if fname != current_filename[]
@@ -157,8 +169,7 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode)
157169
if is_error(node)
158170
print(io, "(", untokenize(head(node)), ")")
159171
else
160-
val = node.val
161-
print(io, val isa Symbol ? string(val) : repr(val))
172+
print(io, leaf_string(node))
162173
end
163174
else
164175
print(io, "(", untokenize(head(node)))

src/tokenize.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,8 +1319,10 @@ function lex_identifier(l::Lexer, c)
13191319

13201320
if n > MAX_KW_LENGTH
13211321
emit(l, K"Identifier")
1322+
elseif h == _true_hash || h == _false_hash
1323+
emit(l, K"Bool")
13221324
else
1323-
emit(l, get(kw_hash, h, K"Identifier"))
1325+
emit(l, get(_kw_hash, h, K"Identifier"))
13241326
end
13251327
end
13261328

@@ -1374,8 +1376,6 @@ K"while",
13741376
K"in",
13751377
K"isa",
13761378
K"where",
1377-
K"true",
1378-
K"false",
13791379

13801380
K"abstract",
13811381
K"as",
@@ -1387,6 +1387,8 @@ K"type",
13871387
K"var",
13881388
]
13891389

1390-
const kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws)
1390+
const _true_hash = simple_hash("true")
1391+
const _false_hash = simple_hash("false")
1392+
const _kw_hash = Dict(simple_hash(lowercase(string(kw))) => kw for kw in kws)
13911393

13921394
end # module

test/parser.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -635,18 +635,18 @@ tests = [
635635
"(try (block x) (catch e (block y)) (finally (block z)))"
636636
((v=v"1.8",), "try \n x \n catch e \n y \n else z finally \n w end") =>
637637
"(try (block x) (catch e (block y)) (else (block z)) (finally (block w)))"
638-
"try x catch end" => "(try (block x) (catch false (block)))"
639-
"try x catch ; y end" => "(try (block x) (catch false (block y)))"
640-
"try x catch \n y end" => "(try (block x) (catch false (block y)))"
638+
"try x catch end" => "(try (block x) (catch (block)))"
639+
"try x catch ; y end" => "(try (block x) (catch (block y)))"
640+
"try x catch \n y end" => "(try (block x) (catch (block y)))"
641641
"try x catch e y end" => "(try (block x) (catch e (block y)))"
642642
"try x catch \$e y end" => "(try (block x) (catch (\$ e) (block y)))"
643643
"try x catch var\"#\" y end" => "(try (block x) (catch (var #) (block y)))"
644644
"try x catch e+3 y end" => "(try (block x) (catch (error (call-i e + 3)) (block y)))"
645645
"try x finally y end" => "(try (block x) (finally (block y)))"
646646
# v1.8 only
647-
((v=v"1.8",), "try catch ; else end") => "(try (block) (catch false (block)) (else (block)))"
647+
((v=v"1.8",), "try catch ; else end") => "(try (block) (catch (block)) (else (block)))"
648648
((v=v"1.8",), "try else x finally y end") => "(try (block) (else (error (block x))) (finally (block y)))"
649-
((v=v"1.7",), "try catch ; else end") => "(try (block) (catch false (block)) (else (error (block))))"
649+
((v=v"1.7",), "try catch ; else end") => "(try (block) (catch (block)) (else (error (block))))"
650650
# finally before catch :-(
651651
"try x finally y catch e z end" => "(try (block x) (finally (block y)) (catch e (block z)))"
652652
"try x end" => "(try (block x) (error-t))"

test/tokenize.jl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,10 @@ end
198198
end
199199

200200
@testset "tokenizing true/false literals" begin
201-
@test tok("somtext true", 3).kind == K"true"
202-
@test tok("somtext false", 3).kind == K"false"
201+
@test tok("somtext true", 3).kind == K"Bool"
202+
@test tok("somtext false", 3).kind == K"Bool"
203203
@test tok("somtext tr", 3).kind == K"Identifier"
204+
@test tok("somtext truething", 3).kind == K"Identifier"
204205
@test tok("somtext falsething", 3).kind == K"Identifier"
205206
end
206207

@@ -962,9 +963,6 @@ const all_kws = Set([
962963
"primitive",
963964
"type",
964965
"var",
965-
# Literals
966-
"true",
967-
"false",
968966
# Word-like operators
969967
"in",
970968
"isa",
@@ -974,14 +972,14 @@ const all_kws = Set([
974972
function check_kw_hashes(iter)
975973
for cs in iter
976974
str = String([cs...])
977-
if Tokenize.simple_hash(str) in keys(Tokenize.kw_hash)
975+
if Tokenize.simple_hash(str) in keys(Tokenize._kw_hash)
978976
@test str in all_kws
979977
end
980978
end
981979
end
982980

983981
@testset "simple_hash" begin
984-
@test length(all_kws) == length(Tokenize.kw_hash)
982+
@test length(all_kws) == length(Tokenize._kw_hash)
985983

986984
@testset "Length $len keywords" for len in 1:5
987985
check_kw_hashes(String([cs...]) for cs in Iterators.product(['a':'z' for _ in 1:len]...))

0 commit comments

Comments
 (0)