Skip to content

Commit d2e61f0

Browse files
authored
Fix premature lowering of cmd strings (#480)
Unadorned backtick command syntax was prematurely lowered to a macrocall in the parser with the `core_@cmd` macro name. Remove this special macro name (almost the last of the special zero-width tokens to be removed!) and rely instead on the presence of unadorned `cmdstring` to do the lowering to a `Core.@cmd` call later during Expr conversion. Also some clean up `Kind`s, grouping them more sensibly and removing the obsolete kinds `K"core_@int128_str"` `K"core_@uint128_str"` `K"core_@big_str"`.
1 parent ebda082 commit d2e61f0

File tree

7 files changed

+90
-85
lines changed

7 files changed

+90
-85
lines changed

docs/src/reference.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ the source text more closely.
4848
* Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217)
4949
* Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220)
5050
* `return` without a value has zero children, rather than lowering to `return nothing` (#220)
51+
* Command syntax `` `foo` `` parses into a `cmdstring` tree node wrapping the string, as `(cmdstring "foo")` (#438). These are lowered to a macro call later rather than by the parser.
5152

5253
### Containers for string-like constructs
5354

src/expr.jl

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,7 @@ end
7070

7171
function _leaf_to_Expr(source, txtbuf, head, srcrange, node)
7272
k = kind(head)
73-
if k == K"core_@cmd"
74-
return GlobalRef(Core, Symbol("@cmd"))
75-
elseif k == K"MacroName" && view(source, srcrange) == "."
73+
if k == K"MacroName" && view(source, srcrange) == "."
7674
return Symbol("@__dot__")
7775
elseif is_error(k)
7876
return k == K"error" ?
@@ -102,7 +100,7 @@ end
102100
#
103101
# This function concatenating adjacent string chunks together as done in the
104102
# reference parser.
105-
function _string_to_Expr(k, args)
103+
function _string_to_Expr(args)
106104
args2 = Any[]
107105
i = 1
108106
while i <= length(args)
@@ -140,7 +138,7 @@ function _string_to_Expr(k, args)
140138
# """\n a\n b""" ==> "a\nb"
141139
return only(args2)
142140
else
143-
# This only happens when k == K"string" or when an error has occurred.
141+
# This only happens when the kind is K"string" or when an error has occurred.
144142
return Expr(:string, args2...)
145143
end
146144
end
@@ -212,13 +210,17 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
212210
# K"var" and K"char" nodes, but this discounts having embedded error
213211
# nodes when ignore_errors=true is set.
214212
return args[1]
215-
elseif k == K"string" || k == K"cmdstring"
216-
return _string_to_Expr(k, args)
213+
elseif k == K"string"
214+
return _string_to_Expr(args)
217215
end
218216

219217
loc = source_location(LineNumberNode, source, first(srcrange))
220218
endloc = source_location(LineNumberNode, source, last(srcrange))
221219

220+
if k == K"cmdstring"
221+
return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc, _string_to_Expr(args))
222+
end
223+
222224
_fixup_Expr_children!(head, loc, args)
223225

224226
headstr = untokenize(head, include_flag_suff=false)
@@ -229,6 +231,13 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
229231
if k == K"?"
230232
headsym = :if
231233
elseif k == K"macrocall"
234+
if length(args) == 2
235+
a2 = args[2]
236+
if @isexpr(a2, :macrocall) && kind(childheads[1]) == K"CmdMacroName"
237+
# Fix up for custom cmd macros like `` foo`x` ``
238+
args[2] = a2.args[3]
239+
end
240+
end
232241
do_lambda = _extract_do_lambda!(args)
233242
_reorder_parameters!(args, 2)
234243
insert!(args, 2, loc)

src/kinds.jl

Lines changed: 45 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -189,35 +189,24 @@ kind(k::Kind) = k
189189
#-------------------------------------------------------------------------------
190190
# Kinds used by JuliaSyntax
191191
register_kinds!(JuliaSyntax, 0, [
192-
"None" # Placeholder; never emitted by lexer
193-
"EndMarker" # EOF
192+
# Whitespace
194193
"Comment"
195194
"Whitespace"
196195
"NewlineWs" # newline-containing whitespace
197-
"Identifier"
198-
"@"
199-
","
200-
";"
201196

202-
"BEGIN_ERRORS"
203-
# Tokenization errors
204-
"ErrorEofMultiComment"
205-
"ErrorInvalidNumericConstant"
206-
"ErrorHexFloatMustContainP"
207-
"ErrorAmbiguousNumericConstant"
208-
"ErrorAmbiguousNumericDotMultiply"
209-
"ErrorInvalidInterpolationTerminator"
210-
"ErrorNumericOverflow"
211-
"ErrorInvalidEscapeSequence"
212-
"ErrorOverLongCharacter"
213-
"ErrorInvalidUTF8"
214-
"ErrorInvisibleChar"
215-
"ErrorIdentifierStart"
216-
"ErrorUnknownCharacter"
217-
"ErrorBidiFormatting"
218-
# Generic error
219-
"error"
220-
"END_ERRORS"
197+
# Identifiers
198+
"BEGIN_IDENTIFIERS"
199+
"Identifier"
200+
# Macro names are modelled as special kinds of identifiers because the full
201+
# macro name may not appear as characters in the source: The `@` may be
202+
# detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd
203+
# suffix appended.
204+
"BEGIN_MACRO_NAMES"
205+
"MacroName"
206+
"StringMacroName"
207+
"CmdMacroName"
208+
"END_MACRO_NAMES"
209+
"END_IDENTIFIERS"
221210

222211
"BEGIN_KEYWORDS"
223212
"baremodule"
@@ -278,6 +267,12 @@ register_kinds!(JuliaSyntax, 0, [
278267
"END_LITERAL"
279268

280269
"BEGIN_DELIMITERS"
270+
# Punctuation
271+
"@"
272+
","
273+
";"
274+
275+
# Paired delimiters
281276
"["
282277
"]"
283278
"{"
@@ -1028,45 +1023,6 @@ register_kinds!(JuliaSyntax, 0, [
10281023
"END_UNICODE_OPS"
10291024
"END_OPS"
10301025

1031-
# The following kinds are emitted by the parser. There's two types of these:
1032-
1033-
# 1. Implied tokens which have a position but might have zero width in the
1034-
# source text.
1035-
#
1036-
# In some cases we want to generate parse tree nodes in a standard form,
1037-
# but some of the leaf tokens are implied rather than existing in the
1038-
# source text, or the lexed tokens need to be re-kinded to represent
1039-
# special forms which only the parser can infer. These are "parser tokens".
1040-
#
1041-
# Some examples:
1042-
#
1043-
# Docstrings - the macro name is invisible
1044-
# "doc" foo() = 1 ==> (macrocall (core @doc) . (= (call foo) 1))
1045-
#
1046-
# String macros - the macro name does not appear in the source text, so we
1047-
# need a special kind of token to imply it.
1048-
#
1049-
# In these cases, we use some special kinds which can be emitted as zero
1050-
# width tokens to keep the parse tree more uniform.
1051-
"BEGIN_PARSER_TOKENS"
1052-
1053-
"TOMBSTONE" # Empty placeholder for kind to be filled later
1054-
1055-
# Macro names are modelled as a special kind of identifier because the
1056-
# @ may not be attached to the macro name in the source (or may not be
1057-
# associated with a token at all in the case of implied macro calls
1058-
# like CORE_DOC_MACRO_NAME)
1059-
"BEGIN_MACRO_NAMES"
1060-
"MacroName"
1061-
"StringMacroName"
1062-
"CmdMacroName"
1063-
"core_@cmd"
1064-
"core_@int128_str"
1065-
"core_@uint128_str"
1066-
"core_@big_str"
1067-
"END_MACRO_NAMES"
1068-
"END_PARSER_TOKENS"
1069-
10701026
# 2. Nonterminals which are exposed in the AST, but where the surface
10711027
# syntax doesn't have a token corresponding to the node type.
10721028
"BEGIN_SYNTAX_KINDS"
@@ -1108,6 +1064,31 @@ register_kinds!(JuliaSyntax, 0, [
11081064
# Container for a single statement/atom plus any trivia and errors
11091065
"wrapper"
11101066
"END_SYNTAX_KINDS"
1067+
1068+
# Special tokens
1069+
"TOMBSTONE" # Empty placeholder for kind to be filled later
1070+
"None" # Placeholder; never emitted by lexer
1071+
"EndMarker" # EOF
1072+
1073+
"BEGIN_ERRORS"
1074+
# Tokenization errors
1075+
"ErrorEofMultiComment"
1076+
"ErrorInvalidNumericConstant"
1077+
"ErrorHexFloatMustContainP"
1078+
"ErrorAmbiguousNumericConstant"
1079+
"ErrorAmbiguousNumericDotMultiply"
1080+
"ErrorInvalidInterpolationTerminator"
1081+
"ErrorNumericOverflow"
1082+
"ErrorInvalidEscapeSequence"
1083+
"ErrorOverLongCharacter"
1084+
"ErrorInvalidUTF8"
1085+
"ErrorInvisibleChar"
1086+
"ErrorIdentifierStart"
1087+
"ErrorUnknownCharacter"
1088+
"ErrorBidiFormatting"
1089+
# Generic error
1090+
"error"
1091+
"END_ERRORS"
11111092
])
11121093

11131094
#-------------------------------------------------------------------------------

src/literal_parsing.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -438,8 +438,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
438438
Symbol("@$(normalize_identifier(val_str))_str")
439439
elseif k == K"CmdMacroName"
440440
Symbol("@$(normalize_identifier(val_str))_cmd")
441-
elseif k == K"core_@cmd"
442-
Symbol("core_@cmd")
443441
elseif is_syntax_kind(head)
444442
nothing
445443
elseif is_keyword(k)

src/parser.jl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3596,12 +3596,10 @@ function parse_atom(ps::ParseState, check_identifiers=true)
35963596
elseif is_string_delim(leading_kind)
35973597
parse_string(ps, false)
35983598
elseif leading_kind in KSet"` ```"
3599-
# `` ==> (macrocall core_@cmd (cmdstring-r ""))
3600-
# `cmd` ==> (macrocall core_@cmd (cmdstring-r "cmd"))
3601-
# ```cmd``` ==> (macrocall core_@cmd (cmdstring-s-r "cmd"))
3602-
bump_invisible(ps, K"core_@cmd")
3599+
# `` ==> (cmdstring-r "")
3600+
# `cmd` ==> (cmdstring-r "cmd")
3601+
# ```cmd``` ==> (cmdstring-s-r "cmd")
36033602
parse_string(ps, true)
3604-
emit(ps, mark, K"macrocall")
36053603
elseif is_literal(leading_kind)
36063604
# 42 ==> 42
36073605
bump(ps)

test/expr.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,24 @@
663663
Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), LineNumberNode(2), "x", :f)
664664
end
665665

666+
@testset "String and cmd macros" begin
667+
# Custom string macros
668+
@test parsestmt("foo\"str\"") ==
669+
Expr(:macrocall, Symbol("@foo_str"), LineNumberNode(1), "str")
670+
# Bare @cmd
671+
@test parsestmt("\n`str`") ==
672+
Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(2), "str")
673+
# Custom cmd macros
674+
@test parsestmt("foo`str`") ==
675+
Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "str")
676+
@test parsestmt("foo```\n a\n b```") ==
677+
Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1), "a\nb")
678+
# Expr conversion distinguishes from explicit calls to a macro of the same name
679+
@test parsestmt("@foo_cmd `str`") ==
680+
Expr(:macrocall, Symbol("@foo_cmd"), LineNumberNode(1),
681+
Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), LineNumberNode(1), "str"))
682+
end
683+
666684
@testset "return" begin
667685
@test parsestmt("return x") == Expr(:return, :x)
668686
@test parsestmt("return") == Expr(:return, nothing)

test/parser.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -847,9 +847,9 @@ tests = [
847847
# __dot__ macro
848848
"@. x" => "(macrocall @. x)"
849849
# cmd strings
850-
"``" => "(macrocall core_@cmd (cmdstring-r \"\"))"
851-
"`cmd`" => "(macrocall core_@cmd (cmdstring-r \"cmd\"))"
852-
"```cmd```" => "(macrocall core_@cmd (cmdstring-s-r \"cmd\"))"
850+
"``" => "(cmdstring-r \"\")"
851+
"`cmd`" => "(cmdstring-r \"cmd\")"
852+
"```cmd```" => "(cmdstring-s-r \"cmd\")"
853853
# literals
854854
"true" => "true"
855855
"42" => "42"
@@ -922,7 +922,7 @@ tests = [
922922
# Triple-quoted dedenting:
923923
"\"\"\"\nx\"\"\"" => raw"""(string-s "x")"""
924924
"\"\"\"\n\nx\"\"\"" => raw"""(string-s "\n" "x")"""
925-
"```\n x\n y```" => raw"""(macrocall core_@cmd (cmdstring-s-r "x\n" "y"))"""
925+
"```\n x\n y```" => raw"""(cmdstring-s-r "x\n" "y")"""
926926
# Various newlines (\n \r \r\n) and whitespace (' ' \t)
927927
"\"\"\"\n x\n y\"\"\"" => raw"""(string-s "x\n" "y")"""
928928
"\"\"\"\r x\r y\"\"\"" => raw"""(string-s "x\n" "y")"""
@@ -976,7 +976,7 @@ tests = [
976976
"'ab'" => "(char (ErrorOverLongCharacter))"
977977
"\"\xf5\"" => "(string (ErrorInvalidUTF8))"
978978
"'\xf5'" => "(char (ErrorInvalidUTF8))"
979-
"`\xf5`" => "(macrocall core_@cmd (cmdstring-r (ErrorInvalidUTF8)))"
979+
"`\xf5`" => "(cmdstring-r (ErrorInvalidUTF8))"
980980
"10.0e1000'" => "(ErrorNumericOverflow)"
981981
"10.0f100'" => "(ErrorNumericOverflow)"
982982
],
@@ -1053,8 +1053,8 @@ parsestmt_test_specs = [
10531053
# detecting raw vs non-raw strings. The old parser was tightly coupled to
10541054
# the lexer and the parser state was used to disambiguate these cases.
10551055
"x in' '" => "(call-i x in (char (error)))"
1056-
"x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (macrocall core_@cmd (cmdstring-r (error-t)))) \$ (error)))"
1057-
"var\"#\"`str`" => "(juxtapose (var # (error-t)) (macrocall core_@cmd (cmdstring-r \"str\")))"
1056+
"x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (cmdstring-r (error-t))) \$ (error)))"
1057+
"var\"#\"`str`" => "(juxtapose (var # (error-t)) (cmdstring-r \"str\"))"
10581058
"var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))"
10591059
]
10601060

0 commit comments

Comments
 (0)