Skip to content

Commit d0f7c03

Browse files
Fix delimiter metadata for single quote atoms and remote calls (#13966)
1 parent ee90126 commit d0f7c03

File tree

4 files changed

+69
-28
lines changed

4 files changed

+69
-28
lines changed

lib/elixir/src/elixir_parser.yrl

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,9 @@ access_expr -> list_heredoc : build_list_heredoc('$1').
284284
access_expr -> bitstring : '$1'.
285285
access_expr -> sigil : build_sigil('$1').
286286
access_expr -> atom : handle_literal(?exprs('$1'), '$1').
287-
access_expr -> atom_quoted : handle_literal(?exprs('$1'), '$1', delimiter(<<$">>)).
288-
access_expr -> atom_safe : build_quoted_atom('$1', true, delimiter(<<$">>)).
289-
access_expr -> atom_unsafe : build_quoted_atom('$1', false, delimiter(<<$">>)).
287+
access_expr -> atom_quoted : handle_literal(?exprs('$1'), '$1', atom_delimiter('$1')).
288+
access_expr -> atom_safe : build_quoted_atom('$1', true, atom_delimiter('$1')).
289+
access_expr -> atom_unsafe : build_quoted_atom('$1', false, atom_delimiter('$1')).
290290
access_expr -> dot_alias : '$1'.
291291
access_expr -> parens_call : '$1'.
292292

@@ -553,12 +553,12 @@ call_args_parens -> open_paren call_args_parens_base ',' kw_call close_paren :
553553

554554
% KV
555555

556-
kw_eol -> kw_identifier : handle_literal(?exprs('$1'), '$1', [{format, keyword}]).
557-
kw_eol -> kw_identifier eol : handle_literal(?exprs('$1'), '$1', [{format, keyword}]).
558-
kw_eol -> kw_identifier_safe : build_quoted_atom('$1', true, [{format, keyword}]).
559-
kw_eol -> kw_identifier_safe eol : build_quoted_atom('$1', true, [{format, keyword}]).
560-
kw_eol -> kw_identifier_unsafe : build_quoted_atom('$1', false, [{format, keyword}]).
561-
kw_eol -> kw_identifier_unsafe eol : build_quoted_atom('$1', false, [{format, keyword}]).
556+
kw_eol -> kw_identifier : handle_literal(?exprs('$1'), '$1', kw_identifier_meta('$1')).
557+
kw_eol -> kw_identifier eol : handle_literal(?exprs('$1'), '$1', kw_identifier_meta('$1')).
558+
kw_eol -> kw_identifier_safe : build_quoted_atom('$1', true, kw_identifier_meta('$1')).
559+
kw_eol -> kw_identifier_safe eol : build_quoted_atom('$1', true, kw_identifier_meta('$1')).
560+
kw_eol -> kw_identifier_unsafe : build_quoted_atom('$1', false, kw_identifier_meta('$1')).
561+
kw_eol -> kw_identifier_unsafe eol : build_quoted_atom('$1', false, kw_identifier_meta('$1')).
562562

563563
kw_base -> kw_eol container_expr : [{'$1', '$2'}].
564564
kw_base -> kw_base ',' kw_eol container_expr : [{'$3', '$4'} | '$1'].
@@ -892,8 +892,8 @@ build_dot(Dot, Left, {_, Location, _} = Right) ->
892892
Meta = meta_from_token(Dot),
893893
IdentifierMeta0 = meta_from_location(Location),
894894
IdentifierMeta1 =
895-
case Dot of
896-
{'.', {_Line, _Column, Delimiter}} when Delimiter =/= nil ->
895+
case Location of
896+
{_Line, _Column, Delimiter} when is_integer(Delimiter) ->
897897
delimiter(<<Delimiter>>) ++ IdentifierMeta0;
898898
_ ->
899899
IdentifierMeta0
@@ -1033,6 +1033,19 @@ build_quoted_atom({_, Location, Args}, Safe, ExtraMeta) ->
10331033
binary_to_atom_op(true) -> binary_to_existing_atom;
10341034
binary_to_atom_op(false) -> binary_to_atom.
10351035

1036+
atom_delimiter({_Kind, {_Line, _Column, Delimiter}, _Args}) ->
1037+
case ?token_metadata() of
1038+
true -> [{delimiter, <<Delimiter>>}];
1039+
false -> []
1040+
end.
1041+
1042+
kw_identifier_meta({_Kind, {_Line, _Column, Delimiter}, _Args}) ->
1043+
Meta = [{format, keyword}],
1044+
case ?token_metadata() of
1045+
true when is_integer(Delimiter) -> [{delimiter, <<Delimiter>>} | Meta];
1046+
_ -> Meta
1047+
end.
1048+
10361049
charlist_parts(Parts) ->
10371050
[charlist_part(Part) || Part <- Parts].
10381051
charlist_part(Binary) when is_binary(Binary) ->

lib/elixir/src/elixir_tokenizer.erl

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ tokenize([$:, H | T] = Original, Line, Column, Scope, Tokens) when ?is_quote(H)
511511
{ok, [Part]} when is_binary(Part) ->
512512
case unsafe_to_atom(Part, Line, Column, Scope) of
513513
{ok, Atom} ->
514-
Token = {atom_quoted, {Line, Column, nil}, Atom},
514+
Token = {atom_quoted, {Line, Column, H}, Atom},
515515
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]);
516516

517517
{error, Reason} ->
@@ -523,7 +523,7 @@ tokenize([$:, H | T] = Original, Line, Column, Scope, Tokens) when ?is_quote(H)
523523
true -> atom_safe;
524524
false -> atom_unsafe
525525
end,
526-
Token = {Key, {Line, Column, nil}, Unescaped},
526+
Token = {Key, {Line, Column, H}, Unescaped},
527527
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]);
528528

529529
{error, Reason} ->
@@ -797,7 +797,7 @@ handle_strings(T, Line, Column, H, Scope, Tokens) ->
797797
{ok, [Part]} when is_binary(Part) ->
798798
case unsafe_to_atom(Part, Line, Column - 1, Scope) of
799799
{ok, Atom} ->
800-
Token = {kw_identifier, {Line, Column - 1, nil}, Atom},
800+
Token = {kw_identifier, {Line, Column - 1, H}, Atom},
801801
tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]);
802802
{error, Reason} ->
803803
error(Reason, Rest, NewScope, Tokens)
@@ -808,7 +808,7 @@ handle_strings(T, Line, Column, H, Scope, Tokens) ->
808808
true -> kw_identifier_safe;
809809
false -> kw_identifier_unsafe
810810
end,
811-
Token = {Key, {Line, Column - 1, nil}, Unescaped},
811+
Token = {Key, {Line, Column - 1, H}, Unescaped},
812812
tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]);
813813

814814
{error, Reason} ->
@@ -918,9 +918,8 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, Scope, Tokens) when ?i
918918

919919
case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of
920920
{ok, Atom} ->
921-
Token = check_call_identifier(Line, Column, Part, Atom, Rest),
922-
DotInfo1 = setelement(3, DotInfo, $"),
923-
TokensSoFar = add_token_with_eol({'.', DotInfo1}, Tokens),
921+
Token = check_call_identifier(Line, Column, H, Atom, Rest),
922+
TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
924923
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]);
925924

926925
{error, Reason} ->
@@ -1331,12 +1330,12 @@ tokenize_alias(Rest, Line, Column, Unencoded, Atom, Length, Ascii, Special, Scop
13311330

13321331
%% Check if it is a call identifier (paren | bracket | do)
13331332

1334-
check_call_identifier(Line, Column, Unencoded, Atom, [$( | _]) ->
1335-
{paren_identifier, {Line, Column, Unencoded}, Atom};
1336-
check_call_identifier(Line, Column, Unencoded, Atom, [$[ | _]) ->
1337-
{bracket_identifier, {Line, Column, Unencoded}, Atom};
1338-
check_call_identifier(Line, Column, Unencoded, Atom, _Rest) ->
1339-
{identifier, {Line, Column, Unencoded}, Atom}.
1333+
check_call_identifier(Line, Column, Info, Atom, [$( | _]) ->
1334+
{paren_identifier, {Line, Column, Info}, Atom};
1335+
check_call_identifier(Line, Column, Info, Atom, [$[ | _]) ->
1336+
{bracket_identifier, {Line, Column, Info}, Atom};
1337+
check_call_identifier(Line, Column, Info, Atom, _Rest) ->
1338+
{identifier, {Line, Column, Info}, Atom}.
13401339

13411340
add_token_with_eol({unary_op, _, _} = Left, T) -> [Left | T];
13421341
add_token_with_eol(Left, [{eol, _} | T]) -> [Left | T];

lib/elixir/test/elixir/kernel/parser_test.exs

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,17 +128,46 @@ defmodule Kernel.ParserTest do
128128
end
129129

130130
test "handles graphemes inside quoted identifiers" do
131+
string_to_quoted =
132+
fn code ->
133+
Code.string_to_quoted!(code,
134+
token_metadata: true,
135+
literal_encoder: &{:ok, {:__block__, &2, [&1]}},
136+
emit_warnings: false
137+
)
138+
end
139+
131140
assert {
132141
{:., _, [{:foo, _, nil}, :"➡️"]},
133142
[no_parens: true, delimiter: ~S["], line: 1],
134143
[]
135-
} = Code.string_to_quoted!(~S|foo."➡️"|, token_metadata: true)
144+
} = string_to_quoted.(~S|foo."➡️"|)
136145

137146
assert {
138147
{:., _, [{:foo, _, nil}, :"➡️"]},
139-
[closing: [line: 1], delimiter: ~S["], line: 1],
148+
[no_parens: true, delimiter: ~S['], line: 1],
140149
[]
141-
} = Code.string_to_quoted!(~S|foo."➡️"()|, token_metadata: true)
150+
} = string_to_quoted.(~S|foo.'➡️'|)
151+
152+
assert {:__block__, [delimiter: ~S["], line: 1], [:"➡️"]} = string_to_quoted.(~S|:"➡️"|)
153+
154+
assert {:__block__, [delimiter: ~S['], line: 1], [:"➡️"]} = string_to_quoted.(~S|:'➡️'|)
155+
156+
assert {:__block__, [closing: [line: 1], line: 1],
157+
[
158+
[
159+
{{:__block__, [delimiter: ~S["], format: :keyword, line: 1], [:"➡️"]},
160+
{:x, [line: 1], nil}}
161+
]
162+
]} = string_to_quoted.(~S|["➡️": x]|)
163+
164+
assert {:__block__, [closing: [line: 1], line: 1],
165+
[
166+
[
167+
{{:__block__, [delimiter: ~S['], format: :keyword, line: 1], [:"➡️"]},
168+
{:x, [line: 1], nil}}
169+
]
170+
]} = string_to_quoted.(~S|['➡️': x]|)
142171
end
143172
end
144173

lib/elixir/test/erlang/tokenizer_test.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ unquoted_atom_test() ->
6161
[{atom, {1, 1, _}, '&&'}] = tokenize(":&&").
6262

6363
quoted_atom_test() ->
64-
[{atom_quoted, {1, 1, nil}, 'foo bar'}] = tokenize(":\"foo bar\"").
64+
[{atom_quoted, {1, 1, $"}, 'foo bar'}] = tokenize(":\"foo bar\"").
6565

6666
oversized_atom_test() ->
6767
OversizedAtom = string:copies("a", 256),

0 commit comments

Comments
 (0)