diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 16670d5f3e0..cd98e84ec7d 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -5,7 +5,7 @@ % Handle string and string-like interpolations. -module(elixir_interpolation). -export([extract/6, unescape_string/1, unescape_string/2, -unescape_tokens/1, unescape_map/1]). +unescape_tokens/1, unescape_map/1, format_error/1]). -include("elixir.hrl"). -include("elixir_tokenizer.hrl"). @@ -135,10 +135,16 @@ cursor_complete(Line, Column, Terminators) -> %% Unescape a series of tokens as returned by extract. unescape_tokens(Tokens) -> - try [unescape_token(Token, fun unescape_map/1) || Token <- Tokens] of - Unescaped -> {ok, Unescaped} + try + erlang:put(elixir_interpolation_warnings, []), + Unescaped = [unescape_token(Token, fun unescape_map/1) || Token <- Tokens], + Warnings = lists:reverse(erlang:get(elixir_interpolation_warnings)), + erlang:erase(elixir_interpolation_warnings), + {ok, Unescaped, Warnings} catch - {error, _Reason, _Token} = Error -> Error + {error, _Reason, _Token} = Error -> + erlang:erase(elixir_interpolation_warnings), + Error end. unescape_token(Token, Map) when is_list(Token) -> @@ -211,31 +217,31 @@ unescape_hex(<>, Map, Acc) when ?is_hex(A), ?is_hex(B) -> %% TODO: Remove deprecated sequences on v2.0 unescape_hex(<>, Map, Acc) when ?is_hex(A) -> - io:format(standard_error, "warning: \\xH inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []), + warn(deprecated_xh), append_codepoint(Rest, Map, [A], Acc, 16); unescape_hex(<<${, A, $}, Rest/binary>>, Map, Acc) when ?is_hex(A) -> - io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []), + warn(deprecated_xh_curly), append_codepoint(Rest, Map, [A], Acc, 16); unescape_hex(<<${, A, B, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B) -> - io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []), + warn(deprecated_xh_curly), append_codepoint(Rest, Map, [A, B], Acc, 16); unescape_hex(<<${, A, B, C, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C) -> - io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []), + warn(deprecated_xh_curly), append_codepoint(Rest, Map, [A, B, C], Acc, 16); unescape_hex(<<${, A, B, C, D, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D) -> - io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []), + warn(deprecated_xh_curly), append_codepoint(Rest, Map, [A, B, C, D], Acc, 16); unescape_hex(<<${, A, B, C, D, E, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D), ?is_hex(E) -> - io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []), + warn(deprecated_xh_curly), append_codepoint(Rest, Map, [A, B, C, D, E], Acc, 16); unescape_hex(<<${, A, B, C, D, E, F, $}, Rest/binary>>, Map, Acc) when ?is_hex(A), ?is_hex(B), ?is_hex(C), ?is_hex(D), ?is_hex(E), ?is_hex(F) -> - io:format(standard_error, "warning: \\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead~n", []), + warn(deprecated_xh_curly), append_codepoint(Rest, Map, [A, B, C, D, E, F], Acc, 16); unescape_hex(<<_/binary>>, _Map, _Acc) -> @@ -292,6 +298,12 @@ unescape_map($t) -> $\t; unescape_map($v) -> $\v; unescape_map(E) -> E. +warn(Type) -> + case erlang:get(elixir_interpolation_warnings) of + undefined -> ok; + List -> erlang:put(elixir_interpolation_warnings, [Type | List]) + end. + % Extract Helpers finish_extraction(Remaining, Buffer, Output, Line, Column, Scope) -> @@ -310,3 +322,8 @@ build_interpol(Line, Column, EndLine, EndColumn, Buffer, Output) -> prepend_warning(Line, Column, Msg, #elixir_tokenizer{warnings=Warnings} = Scope) -> Scope#elixir_tokenizer{warnings = [{{Line, Column}, Msg} | Warnings]}. + +format_error(deprecated_xh) -> + "\\xH inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead"; +format_error(deprecated_xh_curly) -> + "\\x{H*} inside strings/sigils/chars is deprecated, please use \\xHH (byte) or \\uHHHH (code point) instead". diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 578bc340aac..e739c877772 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -517,23 +517,23 @@ tokenize([$:, H | T] = Original, Line, Column, BaseScope, Tokens) when ?is_quote end, case unescape_tokens(Parts, Line, Column, NewScope) of - {ok, [Part]} when is_binary(Part) -> - case unsafe_to_atom(Part, Line, Column, Scope) of + {ok, [Part], Scope1} when is_binary(Part) -> + case unsafe_to_atom(Part, Line, Column, Scope1) of {ok, Atom} -> Token = {atom_quoted, {Line, Column, H}, Atom}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) end; - {ok, Unescaped} -> + {ok, Unescaped, Scope1} -> Key = case Scope#elixir_tokenizer.existing_atoms_only of true -> atom_safe; false -> atom_unsafe end, Token = {Key, {Line, Column, H}, Unescaped}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) @@ -768,9 +768,9 @@ handle_heredocs(T, Line, Column, H, Scope, Tokens) -> case extract_heredoc_with_interpolation(Line, Column, Scope, true, T, H) of {ok, NewLine, NewColumn, Parts, Rest, NewScope} -> case unescape_tokens(Parts, Line, Column, NewScope) of - {ok, Unescaped} -> + {ok, Unescaped, Scope1} -> Token = {heredoc_type(H), {Line, Column, nil}, NewColumn - 4, Unescaped}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, Scope, Tokens) @@ -807,22 +807,22 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> end, case unescape_tokens(Parts, Line, Column, NewScope) of - {ok, [Part]} when is_binary(Part) -> - case unsafe_to_atom(Part, Line, Column - 1, Scope) of + {ok, [Part], Scope1} when is_binary(Part) -> + case unsafe_to_atom(Part, Line, Column - 1, Scope1) of {ok, Atom} -> Token = {kw_identifier, {Line, Column - 1, H}, Atom}, - tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]); + tokenize(Rest, NewLine, NewColumn + 1, Scope1, [Token | Tokens]); {error, Reason} -> - error(Reason, Rest, NewScope, Tokens) + error(Reason, Rest, Scope1, Tokens) end; - {ok, Unescaped} -> + {ok, Unescaped, Scope1} -> Key = case Scope#elixir_tokenizer.existing_atoms_only of true -> kw_identifier_safe; false -> kw_identifier_unsafe end, Token = {Key, {Line, Column - 1, H}, Unescaped}, - tokenize(Rest, NewLine, NewColumn + 1, NewScope, [Token | Tokens]); + tokenize(Rest, NewLine, NewColumn + 1, Scope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) @@ -843,9 +843,9 @@ handle_strings(T, Line, Column, H, Scope, Tokens) -> end, case unescape_tokens(Parts, Line, Column, NewScope) of - {ok, Unescaped} -> + {ok, Unescaped, Scope1} -> Token = {string_type(H), {Line, Column - 1, nil}, Unescaped}, - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | Tokens]); + tokenize(Rest, NewLine, NewColumn, Scope1, [Token | Tokens]); {error, Reason} -> error(Reason, Rest, NewScope, Tokens) @@ -949,15 +949,15 @@ handle_dot([$., H | T] = Original, Line, Column, DotInfo, BaseScope, Tokens) whe end, case unescape_tokens([Part], Line, Column, NewScope) of - {ok, [UnescapedPart]} -> - case unsafe_to_atom(UnescapedPart, Line, Column, NewScope) of + {ok, [UnescapedPart], NewScope1} -> + case unsafe_to_atom(UnescapedPart, Line, Column, NewScope1) of {ok, Atom} -> Token = check_call_identifier(Line, Column, H, Atom, Rest), TokensSoFar = add_token_with_eol({'.', DotInfo}, Tokens), - tokenize(Rest, NewLine, NewColumn, NewScope, [Token | TokensSoFar]); + tokenize(Rest, NewLine, NewColumn, NewScope1, [Token | TokensSoFar]); {error, Reason} -> - error(Reason, Original, NewScope, Tokens) + error(Reason, Original, NewScope1, Tokens) end; {error, Reason} -> @@ -1158,16 +1158,23 @@ maybe_heredoc_warn(Line, Column, Scope, Marker) -> extract_heredoc_head([[$\n|H]|T]) -> [H|T]. -unescape_tokens(Tokens, Line, Column, #elixir_tokenizer{unescape=true}) -> +unescape_tokens(Tokens, Line, Column, Scope = #elixir_tokenizer{unescape=true}) -> case elixir_interpolation:unescape_tokens(Tokens) of - {ok, Result} -> - {ok, Result}; + {ok, Result, Warnings} -> + NewScope = lists:foldl( + fun(Warn, Acc) -> + prepend_warning(Line, Column, elixir_interpolation:format_error(Warn), Acc) + end, + Scope, + Warnings + ), + {ok, Result, NewScope}; {error, Message, Token} -> {error, {?LOC(Line, Column), Message ++ ". Syntax error after: ", Token}} end; -unescape_tokens(Tokens, _Line, _Column, #elixir_tokenizer{unescape=false}) -> - {ok, tokens_to_binary(Tokens)}. +unescape_tokens(Tokens, _Line, _Column, Scope = #elixir_tokenizer{unescape=false}) -> + {ok, tokens_to_binary(Tokens), Scope}. tokens_to_binary(Tokens) -> [if is_list(Token) -> elixir_utils:characters_to_binary(Token); true -> Token end