Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 28 additions & 11 deletions lib/elixir/src/elixir_interpolation.erl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
% Handle string and string-like interpolations.
-module(elixir_interpolation).
-export([extract/6, unescape_string/1, unescape_string/2,
unescape_tokens/1, unescape_map/1]).
unescape_tokens/1, unescape_map/1, format_error/1]).
-include("elixir.hrl").
-include("elixir_tokenizer.hrl").

Expand Down Expand Up @@ -135,10 +135,16 @@ cursor_complete(Line, Column, Terminators) ->
%% Unescape a series of tokens as returned by extract.

unescape_tokens(Tokens) ->
try [unescape_token(Token, fun unescape_map/1) || Token <- Tokens] of
Unescaped -> {ok, Unescaped}
try
erlang:put(elixir_interpolation_warnings, []),
Unescaped = [unescape_token(Token, fun unescape_map/1) || Token <- Tokens],
Warnings = lists:reverse(erlang:get(elixir_interpolation_warnings)),
erlang:erase(elixir_interpolation_warnings),
{ok, Unescaped, Warnings}
catch
{error, _Reason, _Token} = Error -> Error
{error, _Reason, _Token} = Error ->
erlang:erase(elixir_interpolation_warnings),
Error
end.

unescape_token(Token, Map) when is_list(Token) ->
Expand Down Expand Up @@ -211,31 +217,31 @@ unescape_hex(<<A, B, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B) ->
%% TODO: Remove deprecated sequences on v2.0

unescape_hex(<<A, Rest/binary>>, Map, Acc) when ?is_hex(A) ->
io:format(standard_error, "warning: \\xH inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []),
warn(deprecated_xh),
append_codepoint(Rest, Map, [A], Acc, 16);

unescape_hex(<<${, A, $}, Rest/binary>>, Map, Acc) when ?is_hex(A) ->
io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []),
warn(deprecated_xh_curly),
append_codepoint(Rest, Map, [A], Acc, 16);

unescape_hex(<<${, A, B, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B) ->
io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []),
warn(deprecated_xh_curly),
append_codepoint(Rest, Map, [A, B], Acc, 16);

unescape_hex(<<${, A, B, C, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C) ->
io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []),
warn(deprecated_xh_curly),
append_codepoint(Rest, Map, [A, B, C], Acc, 16);

unescape_hex(<<${, A, B, C, D, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D) ->
io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []),
warn(deprecated_xh_curly),
append_codepoint(Rest, Map, [A, B, C, D], Acc, 16);

unescape_hex(<<${, A, B, C, D, E, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D), ?is_hex(E) ->
io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []),
warn(deprecated_xh_curly),
append_codepoint(Rest, Map, [A, B, C, D, E], Acc, 16);

unescape_hex(<<${, A, B, C, D, E, F, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D), ?is_hex(E), ?is_hex(F) ->
io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []),
warn(deprecated_xh_curly),
append_codepoint(Rest, Map, [A, B, C, D, E, F], Acc, 16);

unescape_hex(<<_/binary>>, _Map, _Acc) ->
Expand Down Expand Up @@ -292,6 +298,12 @@ unescape_map($t) -> $\t;
unescape_map($v) -> $\v;
unescape_map(E) -> E.

warn(Type) ->
case erlang:get(elixir_interpolation_warnings) of
undefined -> ok;
List -> erlang:put(elixir_interpolation_warnings, [Type | List])
end.

% Extract Helpers

finish_extraction(Remaining, Buffer, Output, Line, Column, Scope) ->
Expand All @@ -310,3 +322,8 @@ build_interpol(Line, Column, EndLine, EndColumn, Buffer, Output) ->

prepend_warning(Line, Column, Msg, #elixir_tokenizer{warnings=Warnings} = Scope) ->
Scope#elixir_tokenizer{warnings = [{{Line, Column}, Msg} | Warnings]}.

format_error(deprecated_xh) ->
"\\xH inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead";
format_error(deprecated_xh_curly) ->
"\\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead".
55 changes: 31 additions & 24 deletions lib/elixir/src/elixir_tokenizer.erl
Original file line number Diff line number Diff line change
Expand Up @@ -517,23 +517,23 @@ tokenize([$:, H | T] = Original, Line, Column, BaseScope, Tokens) when ?is_quote
end,

case unescape_tokens(Parts, Line, Column, NewScope) of
{ok, [Part]} when is_binary(Part) ->
case unsafe_to_atom(Part, Line, Column, Scope) of
{ok, [Part], Scope1} when is_binary(Part) ->
case unsafe_to_atom(Part, Line, Column, Scope1) of
{ok, Atom} ->
Token = {atom_quoted, {Line, Column, H}, Atom},
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]);
tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]);

{error, Reason} ->
error(Reason, Rest, NewScope, Tokens)
end;

{ok, Unescaped} ->
{ok, Unescaped, Scope1} ->
Key = case Scope#elixir_tokenizer.existing_atoms_only of
true -> atom_safe;
false -> atom_unsafe
end,
Token = {Key, {Line, Column, H}, Unescaped},
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]);
tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]);

{error, Reason} ->
error(Reason, Rest, NewScope, Tokens)
Expand Down Expand Up @@ -768,9 +768,9 @@ handle_heredocs(T, Line, Column, H, Scope, Tokens) ->
case extract_heredoc_with_interpolation(Line, Column, Scope, true, T, H) of
{ok, NewLine, NewColumn, Parts, Rest, NewScope} ->
case unescape_tokens(Parts, Line, Column, NewScope) of
{ok, Unescaped} ->
{ok, Unescaped, Scope1} ->
Token = {heredoc_type(H), {Line, Column, nil}, NewColumn - 4, Unescaped},
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]);
tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]);

{error, Reason} ->
error(Reason, Rest, Scope, Tokens)
Expand Down Expand Up @@ -807,22 +807,22 @@ handle_strings(T, Line, Column, H, Scope, Tokens) ->
end,

case unescape_tokens(Parts, Line, Column, NewScope) of
{ok, [Part]} when is_binary(Part) ->
case unsafe_to_atom(Part, Line, Column - 1, Scope) of
{ok, [Part], Scope1} when is_binary(Part) ->
case unsafe_to_atom(Part, Line, Column - 1, Scope1) of
{ok, Atom} ->
Token = {kw_identifier, {Line, Column - 1, H}, Atom},
tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]);
tokenize(Rest, NewLine, NewColumn + 1, Scope1, [Token | Tokens]);
{error, Reason} ->
error(Reason, Rest, NewScope, Tokens)
error(Reason, Rest, Scope1, Tokens)
end;

{ok, Unescaped} ->
{ok, Unescaped, Scope1} ->
Key = case Scope#elixir_tokenizer.existing_atoms_only of
true -> kw_identifier_safe;
false -> kw_identifier_unsafe
end,
Token = {Key, {Line, Column - 1, H}, Unescaped},
tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]);
tokenize(Rest, NewLine, NewColumn + 1, Scope1, [Token | Tokens]);

{error, Reason} ->
error(Reason, Rest, NewScope, Tokens)
Expand All @@ -843,9 +843,9 @@ handle_strings(T, Line, Column, H, Scope, Tokens) ->
end,

case unescape_tokens(Parts, Line, Column, NewScope) of
{ok, Unescaped} ->
{ok, Unescaped, Scope1} ->
Token = {string_type(H), {Line, Column - 1, nil}, Unescaped},
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]);
tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]);

{error, Reason} ->
error(Reason, Rest, NewScope, Tokens)
Expand Down Expand Up @@ -949,15 +949,15 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe
end,

case unescape_tokens([Part], Line, Column, NewScope) of
{ok, [UnescapedPart]} ->
case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of
{ok, [UnescapedPart], NewScope1} ->
case unsafe_to_atom(UnescapedPart, Line, Column, NewScope1) of
{ok, Atom} ->
Token = check_call_identifier(Line, Column, H, Atom, Rest),
TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens),
tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]);
tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | TokensSoFar]);

{error, Reason} ->
error(Reason, Original, NewScope, Tokens)
error(Reason, Original, NewScope1, Tokens)
end;

{error, Reason} ->
Expand Down Expand Up @@ -1158,16 +1158,23 @@ maybe_heredoc_warn(Line, Column, Scope, Marker) ->

extract_heredoc_head([[$\n|H]|T]) -> [H|T].

unescape_tokens(Tokens, Line, Column, #elixir_tokenizer{unescape=true}) ->
unescape_tokens(Tokens, Line, Column, Scope = #elixir_tokenizer{unescape=true}) ->
case elixir_interpolation:unescape_tokens(Tokens) of
{ok, Result} ->
{ok, Result};
{ok, Result, Warnings} ->
NewScope = lists:foldl(
fun(Warn, Acc) ->
prepend_warning(Line, Column, elixir_interpolation:format_error(Warn), Acc)
end,
Scope,
Warnings
),
{ok, Result, NewScope};

{error, Message, Token} ->
{error, {?LOC(Line, Column), Message ++ ". Syntax error after: ", Token}}
end;
unescape_tokens(Tokens, _Line, _Column, #elixir_tokenizer{unescape=false}) ->
{ok, tokens_to_binary(Tokens)}.
unescape_tokens(Tokens, _Line, _Column, Scope = #elixir_tokenizer{unescape=false}) ->
{ok, tokens_to_binary(Tokens), Scope}.

tokens_to_binary(Tokens) ->
[if is_list(Token) -> elixir_utils:characters_to_binary(Token); true -> Token end
Expand Down