From c0ed20e7e03b23c8fc208598a3b677348f195318 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonatan=20K=C5=82osko?= <jonatanklosko@gmail.com>
Date: Mon, 4 Nov 2024 23:22:30 +0800
Subject: [PATCH 1/2] Add delimiter meta to demote calls with quoted identifier

---
 lib/elixir/src/elixir_parser.yrl              | 19 ++++++++++-----
 lib/elixir/src/elixir_tokenizer.erl           | 24 +++++++++----------
 lib/elixir/test/elixir/kernel/parser_test.exs | 12 +++++++++-
 lib/elixir/unicode/security.ex                |  4 ++--
 4 files changed, 38 insertions(+), 21 deletions(-)
diff --git a/lib/elixir/src/elixir_parser.yrl b/lib/elixir/src/elixir_parser.yrl
index 8b28b2ff91c..b32fc533add 100644
--- a/lib/elixir/src/elixir_parser.yrl
+++ b/lib/elixir/src/elixir_parser.yrl
@@ -890,8 +890,15 @@ build_dot_container(Dot, Left, Right, Extra) ->
 
 build_dot(Dot, Left, {_, Location, _} = Right) ->
   Meta = meta_from_token(Dot),
-  IdentifierLocation = meta_from_location(Location),
-  {'.', Meta, IdentifierLocation, [Left, extract_identifier(Right)]}.
+  IdentifierMeta0 = meta_from_location(Location),
+  IdentifierMeta1 =
+    case Location of
+      {_Line, _Column, {_Unencoded, Delimiter}} when Delimiter =/= nil ->
+        delimiter(<<Delimiter>>) ++ IdentifierMeta0;
+      _ ->
+        IdentifierMeta0
+    end,
+  {'.', Meta, IdentifierMeta1, [Left, extract_identifier(Right)]}.
 
 extract_identifier({Kind, _, Identifier}) when
     Kind == identifier; Kind == bracket_identifier; Kind == paren_identifier;
@@ -916,8 +923,8 @@ build_no_parens_do_block(Expr, Args, {BlockMeta, Block}) ->
 build_no_parens(Expr, Args) ->
   build_call(Expr, Args).
 
-build_identifier({'.', Meta, IdentifierLocation, DotArgs}) ->
-  {{'.', Meta, DotArgs}, [{no_parens, true} | IdentifierLocation], []};
+build_identifier({'.', Meta, IdentifierMeta, DotArgs}) ->
+  {{'.', Meta, DotArgs}, [{no_parens, true} | IdentifierMeta], []};
 
 build_identifier({'.', Meta, _} = Dot) ->
   {Dot, [{no_parens, true} | Meta], []};
@@ -925,8 +932,8 @@ build_identifier({'.', Meta, _} = Dot) ->
 build_identifier({_, Location, Identifier}) ->
   {Identifier, meta_from_location(Location), nil}.
 
-build_call({'.', Meta, IdentifierLocation, DotArgs}, Args) ->
-  {{'.', Meta, DotArgs}, IdentifierLocation, Args};
+build_call({'.', Meta, IdentifierMeta, DotArgs}, Args) ->
+  {{'.', Meta, DotArgs}, IdentifierMeta, Args};
 
 build_call({'.', Meta, _} = Dot, Args) ->
   {Dot, Meta, Args};
diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl
index 45ee82c244e..6dee340926c 100644
--- a/lib/elixir/src/elixir_tokenizer.erl
+++ b/lib/elixir/src/elixir_tokenizer.erl
@@ -540,7 +540,7 @@ tokenize([$: | String] = Original, Line, Column, Scope, Tokens) ->
     {_Kind, Unencoded, Atom, Rest, Length, Ascii, _Special} ->
       NewScope = maybe_warn_for_ambiguous_bang_before_equals(atom, Unencoded, Rest, Line, Column, Scope),
       TrackedScope = track_ascii(Ascii, NewScope),
-      Token = {atom, {Line, Column, Unencoded}, Atom},
+      Token = {atom, {Line, Column, {Unencoded, nil}}, Atom},
       tokenize(Rest, Line, Column + 1 + Length, TrackedScope, [Token | Tokens]);
     empty when Scope#elixir_tokenizer.cursor_completion == false ->
       unexpected_token(Original, Line, Column, Scope, Tokens);
@@ -651,7 +651,7 @@ tokenize(String, Line, Column, OriginalScope, Tokens) ->
 
       case Rest of
         [$: | T] when ?is_space(hd(T)) ->
-          Token = {kw_identifier, {Line, Column, Unencoded}, Atom},
+          Token = {kw_identifier, {Line, Column, {Unencoded, nil}}, Atom},
           tokenize(T, Line, Column + Length + 1, Scope, [Token | Tokens]);
 
         [$: | T] when hd(T) =/= $: ->
@@ -671,7 +671,7 @@ tokenize(String, Line, Column, OriginalScope, Tokens) ->
 
         _ when Kind == identifier ->
           NewScope = maybe_warn_for_ambiguous_bang_before_equals(identifier, Unencoded, Rest, Line, Column, Scope),
-          Token = check_call_identifier(Line, Column, Unencoded, Atom, Rest),
+          Token = check_call_identifier(Line, Column, Unencoded, nil, Atom, Rest),
           tokenize(Rest, Line, Column + Length, NewScope, [Token | Tokens]);
 
         _ ->
@@ -918,7 +918,7 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, Scope, Tokens) when ?i
 
       case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of
         {ok, Atom} ->
-          Token = check_call_identifier(Line, Column, Part, Atom, Rest),
+          Token = check_call_identifier(Line, Column, Part, $", Atom, Rest),
           TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
           tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]);
 
@@ -937,7 +937,7 @@ handle_dot([$. | Rest], Line, Column, DotInfo, Scope, Tokens) ->
   tokenize(Rest, Line, Column, Scope, TokensSoFar).
 
 handle_call_identifier(Rest, Line, Column, DotInfo, Length, UnencodedOp, Scope, Tokens) ->
-  Token = check_call_identifier(Line, Column, UnencodedOp, list_to_atom(UnencodedOp), Rest),
+  Token = check_call_identifier(Line, Column, UnencodedOp, nil, list_to_atom(UnencodedOp), Rest),
   TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
   tokenize(Rest, Line, Column + Length, Scope, [Token | TokensSoFar]).
 
@@ -1324,18 +1324,18 @@ tokenize_alias(Rest, Line, Column, Unencoded, Atom, Length, Ascii, Special, Scop
       error(Reason, Unencoded ++ Rest, Scope, Tokens);
 
     true ->
-      AliasesToken = {alias, {Line, Column, Unencoded}, Atom},
+      AliasesToken = {alias, {Line, Column, {Unencoded, nil}}, Atom},
       tokenize(Rest, Line, Column + Length, Scope, [AliasesToken | Tokens])
   end.
 
 %% Check if it is a call identifier (paren | bracket | do)
 
-check_call_identifier(Line, Column, Unencoded, Atom, [$( | _]) ->
-  {paren_identifier, {Line, Column, Unencoded}, Atom};
-check_call_identifier(Line, Column, Unencoded, Atom, [$[ | _]) ->
-  {bracket_identifier, {Line, Column, Unencoded}, Atom};
-check_call_identifier(Line, Column, Unencoded, Atom, _Rest) ->
-  {identifier, {Line, Column, Unencoded}, Atom}.
+check_call_identifier(Line, Column, Unencoded, Delimiter, Atom, [$( | _]) ->
+  {paren_identifier, {Line, Column, {Unencoded, Delimiter}}, Atom};
+check_call_identifier(Line, Column, Unencoded, Delimiter, Atom, [$[ | _]) ->
+  {bracket_identifier, {Line, Column, {Unencoded, Delimiter}}, Atom};
+check_call_identifier(Line, Column, Unencoded, Delimiter, Atom, _Rest) ->
+  {identifier, {Line, Column, {Unencoded, Delimiter}}, Atom}.
 
 add_token_with_eol({unary_op, _, _} = Left, T) -> [Left | T];
 add_token_with_eol(Left, [{eol, _} | T]) -> [Left | T];
diff --git a/lib/elixir/test/elixir/kernel/parser_test.exs b/lib/elixir/test/elixir/kernel/parser_test.exs
index 70a9188289e..b18f8a34982 100644
--- a/lib/elixir/test/elixir/kernel/parser_test.exs
+++ b/lib/elixir/test/elixir/kernel/parser_test.exs
@@ -128,7 +128,17 @@ defmodule Kernel.ParserTest do
     end
 
     test "handles graphemes inside quoted identifiers" do
-      assert {{:., _, [{:foo, _, nil}, :"➡️"]}, _, []} = Code.string_to_quoted!(~s|foo."➡️"|)
+      assert {
+               {:., _, [{:foo, _, nil}, :"➡️"]},
+               [no_parens: true, delimiter: ~S["], line: 1],
+               []
+             } = Code.string_to_quoted!(~S|foo."➡️"|, token_metadata: true)
+
+      assert {
+               {:., _, [{:foo, _, nil}, :"➡️"]},
+               [closing: [line: 1], delimiter: ~S["], line: 1],
+               []
+             } = Code.string_to_quoted!(~S|foo."➡️"()|, token_metadata: true)
     end
   end
 
diff --git a/lib/elixir/unicode/security.ex b/lib/elixir/unicode/security.ex
index 90886d58565..2b2bb3ed117 100644
--- a/lib/elixir/unicode/security.ex
+++ b/lib/elixir/unicode/security.ex
@@ -40,7 +40,7 @@ defmodule String.Tokenizer.Security do
   ]
 
   defp check_token_for_confusability(
-         {kind, {_line, _column, [_ | _] = name} = info, _},
+         {kind, {_line, _column, {[_ | _] = name, _delimiter}} = info, _},
          skeletons
        )
        when kind in @identifiers do
@@ -50,7 +50,7 @@ defmodule String.Tokenizer.Security do
       {_, _, ^name} ->
         {:ok, skeletons}
 
-      {line, _, previous_name} when name != previous_name ->
+      {line, _, {previous_name, _delimiter}} when name != previous_name ->
         {:warn,
          "confusable identifier: '#{name}' looks like '#{previous_name}' on line #{line}, " <>
            "but they are written using different characters" <> dir_compare(name, previous_name)}

From d138487c405d165e28f38f3e5dd1e0ebdeaf1fc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonatan=20K=C5=82osko?= <jonatanklosko@gmail.com>
Date: Tue, 5 Nov 2024 19:34:01 +0800
Subject: [PATCH 2/2] Store delimiter in dot token instead

---
 lib/elixir/src/elixir_parser.yrl    |  4 ++--
 lib/elixir/src/elixir_tokenizer.erl | 27 ++++++++++++++-------------
 lib/elixir/unicode/security.ex      |  4 ++--
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/lib/elixir/src/elixir_parser.yrl b/lib/elixir/src/elixir_parser.yrl
index b32fc533add..8777a44d742 100644
--- a/lib/elixir/src/elixir_parser.yrl
+++ b/lib/elixir/src/elixir_parser.yrl
@@ -892,8 +892,8 @@ build_dot(Dot, Left, {_, Location, _} = Right) ->
   Meta = meta_from_token(Dot),
   IdentifierMeta0 = meta_from_location(Location),
   IdentifierMeta1 =
-    case Location of
-      {_Line, _Column, {_Unencoded, Delimiter}} when Delimiter =/= nil ->
+    case Dot of
+      {'.', {_Line, _Column, Delimiter}} when Delimiter =/= nil ->
         delimiter(<<Delimiter>>) ++ IdentifierMeta0;
       _ ->
         IdentifierMeta0
diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl
index 6dee340926c..ebe290d35cc 100644
--- a/lib/elixir/src/elixir_tokenizer.erl
+++ b/lib/elixir/src/elixir_tokenizer.erl
@@ -540,7 +540,7 @@ tokenize([$: | String] = Original, Line, Column, Scope, Tokens) ->
     {_Kind, Unencoded, Atom, Rest, Length, Ascii, _Special} ->
       NewScope = maybe_warn_for_ambiguous_bang_before_equals(atom, Unencoded, Rest, Line, Column, Scope),
       TrackedScope = track_ascii(Ascii, NewScope),
-      Token = {atom, {Line, Column, {Unencoded, nil}}, Atom},
+      Token = {atom, {Line, Column, Unencoded}, Atom},
       tokenize(Rest, Line, Column + 1 + Length, TrackedScope, [Token | Tokens]);
     empty when Scope#elixir_tokenizer.cursor_completion == false ->
       unexpected_token(Original, Line, Column, Scope, Tokens);
@@ -651,7 +651,7 @@ tokenize(String, Line, Column, OriginalScope, Tokens) ->
 
       case Rest of
         [$: | T] when ?is_space(hd(T)) ->
-          Token = {kw_identifier, {Line, Column, {Unencoded, nil}}, Atom},
+          Token = {kw_identifier, {Line, Column, Unencoded}, Atom},
           tokenize(T, Line, Column + Length + 1, Scope, [Token | Tokens]);
 
         [$: | T] when hd(T) =/= $: ->
@@ -671,7 +671,7 @@ tokenize(String, Line, Column, OriginalScope, Tokens) ->
 
         _ when Kind == identifier ->
           NewScope = maybe_warn_for_ambiguous_bang_before_equals(identifier, Unencoded, Rest, Line, Column, Scope),
-          Token = check_call_identifier(Line, Column, Unencoded, nil, Atom, Rest),
+          Token = check_call_identifier(Line, Column, Unencoded, Atom, Rest),
           tokenize(Rest, Line, Column + Length, NewScope, [Token | Tokens]);
 
         _ ->
@@ -918,8 +918,9 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, Scope, Tokens) when ?i
 
       case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of
         {ok, Atom} ->
-          Token = check_call_identifier(Line, Column, Part, $", Atom, Rest),
-          TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
+          Token = check_call_identifier(Line, Column, Part, Atom, Rest),
+          DotInfo1 = setelement(3, DotInfo, $"),
+          TokensSoFar = add_token_with_eol({'.', DotInfo1}, Tokens),
           tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]);
 
         {error, Reason} ->
@@ -937,7 +938,7 @@ handle_dot([$. | Rest], Line, Column, DotInfo, Scope, Tokens) ->
   tokenize(Rest, Line, Column, Scope, TokensSoFar).
 
 handle_call_identifier(Rest, Line, Column, DotInfo, Length, UnencodedOp, Scope, Tokens) ->
-  Token = check_call_identifier(Line, Column, UnencodedOp, nil, list_to_atom(UnencodedOp), Rest),
+  Token = check_call_identifier(Line, Column, UnencodedOp, list_to_atom(UnencodedOp), Rest),
   TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
   tokenize(Rest, Line, Column + Length, Scope, [Token | TokensSoFar]).
 
@@ -1324,18 +1325,18 @@ tokenize_alias(Rest, Line, Column, Unencoded, Atom, Length, Ascii, Special, Scop
       error(Reason, Unencoded ++ Rest, Scope, Tokens);
 
     true ->
-      AliasesToken = {alias, {Line, Column, {Unencoded, nil}}, Atom},
+      AliasesToken = {alias, {Line, Column, Unencoded}, Atom},
       tokenize(Rest, Line, Column + Length, Scope, [AliasesToken | Tokens])
   end.
 
 %% Check if it is a call identifier (paren | bracket | do)
 
-check_call_identifier(Line, Column, Unencoded, Delimiter, Atom, [$( | _]) ->
-  {paren_identifier, {Line, Column, {Unencoded, Delimiter}}, Atom};
-check_call_identifier(Line, Column, Unencoded, Delimiter, Atom, [$[ | _]) ->
-  {bracket_identifier, {Line, Column, {Unencoded, Delimiter}}, Atom};
-check_call_identifier(Line, Column, Unencoded, Delimiter, Atom, _Rest) ->
-  {identifier, {Line, Column, {Unencoded, Delimiter}}, Atom}.
+check_call_identifier(Line, Column, Unencoded, Atom, [$( | _]) ->
+  {paren_identifier, {Line, Column, Unencoded}, Atom};
+check_call_identifier(Line, Column, Unencoded, Atom, [$[ | _]) ->
+  {bracket_identifier, {Line, Column, Unencoded}, Atom};
+check_call_identifier(Line, Column, Unencoded, Atom, _Rest) ->
+  {identifier, {Line, Column, Unencoded}, Atom}.
 
 add_token_with_eol({unary_op, _, _} = Left, T) -> [Left | T];
 add_token_with_eol(Left, [{eol, _} | T]) -> [Left | T];
diff --git a/lib/elixir/unicode/security.ex b/lib/elixir/unicode/security.ex
index 2b2bb3ed117..90886d58565 100644
--- a/lib/elixir/unicode/security.ex
+++ b/lib/elixir/unicode/security.ex
@@ -40,7 +40,7 @@ defmodule String.Tokenizer.Security do
   ]
 
   defp check_token_for_confusability(
-         {kind, {_line, _column, {[_ | _] = name, _delimiter}} = info, _},
+         {kind, {_line, _column, [_ | _] = name} = info, _},
          skeletons
        )
        when kind in @identifiers do
@@ -50,7 +50,7 @@ defmodule String.Tokenizer.Security do
       {_, _, ^name} ->
         {:ok, skeletons}
 
-      {line, _, {previous_name, _delimiter}} when name != previous_name ->
+      {line, _, previous_name} when name != previous_name ->
         {:warn,
          "confusable identifier: '#{name}' looks like '#{previous_name}' on line #{line}, " <>
            "but they are written using different characters" <> dir_compare(name, previous_name)}