@@ -1023,55 +1023,93 @@ is_unnecessary_quote(_Parts, _Scope) ->
10231023unsafe_to_atom (Part , Line , Column , # elixir_tokenizer {}) when
10241024 is_binary (Part ) andalso byte_size (Part ) > 255 ;
10251025 is_list (Part ) andalso length (Part ) > 255 ->
1026- {error , {? LOC (Line , Column ), " atom length must be less than system limit: " , elixir_utils :characters_to_list (Part )}};
1026+ try
1027+ PartList = elixir_utils :characters_to_list (Part ),
1028+ {error , {? LOC (Line , Column ), " atom length must be less than system limit: " , PartList }}
1029+ catch
1030+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1031+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1032+ end ;
10271033unsafe_to_atom (Part , Line , Column , # elixir_tokenizer {static_atoms_encoder = StaticAtomsEncoder }) when
10281034 is_function (StaticAtomsEncoder ) ->
1029- Value = elixir_utils :characters_to_binary (Part ),
1030- case StaticAtomsEncoder (Value , [{line , Line }, {column , Column }]) of
1031- {ok , Term } ->
1032- {ok , Term };
1033- {error , Reason } when is_binary (Reason ) ->
1034- {error , {? LOC (Line , Column ), elixir_utils :characters_to_list (Reason ) ++ " : " , elixir_utils :characters_to_list (Part )}}
1035+ EncodeResult = try
1036+ ValueEncBin = elixir_utils :characters_to_binary (Part ),
1037+ ValueEncList = elixir_utils :characters_to_list (Part ),
1038+ {ok , ValueEncBin , ValueEncList }
1039+ catch
1040+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1041+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1042+ end ,
1043+
1044+ case EncodeResult of
1045+ {ok , Value , ValueList } ->
1046+ case StaticAtomsEncoder (Value , [{line , Line }, {column , Column }]) of
1047+ {ok , Term } ->
1048+ {ok , Term };
1049+ {error , Reason } when is_binary (Reason ) ->
1050+ {error , {? LOC (Line , Column ), elixir_utils :characters_to_list (Reason ) ++ " : " , ValueList }}
1051+ end ;
1052+ EncError -> EncError
10351053 end ;
10361054unsafe_to_atom (Binary , Line , Column , # elixir_tokenizer {existing_atoms_only = true }) when is_binary (Binary ) ->
10371055 try
10381056 {ok , binary_to_existing_atom (Binary , utf8 )}
10391057 catch
10401058 error :badarg ->
10411059 % Check if it's a UTF-8 issue by trying to convert to list
1042- elixir_utils :characters_to_list (Binary ),
1043- % If we get here, it's not a UTF-8 issue
1044- {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , elixir_utils :characters_to_list (Binary )}}
1060+ try
1061+ List = elixir_utils :characters_to_list (Binary ),
1062+ % If we get here, it's not a UTF-8 issue
1063+ {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , List }}
1064+ catch
1065+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1066+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1067+ end
10451068 end ;
10461069unsafe_to_atom (Binary , Line , Column , # elixir_tokenizer {}) when is_binary (Binary ) ->
10471070 try
10481071 {ok , binary_to_atom (Binary , utf8 )}
10491072 catch
10501073 error :badarg ->
10511074 % Try to convert using elixir_utils to get proper UnicodeConversionError
1052- elixir_utils :characters_to_list (Binary ),
1053- % If we get here, it's not a UTF-8 issue, so it's some other badarg
1054- {error , {? LOC (Line , Column ), " invalid atom: " , elixir_utils :characters_to_list (Binary )}}
1075+ try
1076+ List = elixir_utils :characters_to_list (Binary ),
1077+ % If we get here, it's not a UTF-8 issue, so it's some other badarg
1078+ {error , {? LOC (Line , Column ), " invalid atom: " , List }}
1079+ catch
1080+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1081+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1082+ end
10551083 end ;
10561084unsafe_to_atom (List , Line , Column , # elixir_tokenizer {existing_atoms_only = true }) when is_list (List ) ->
10571085 try
10581086 {ok , list_to_existing_atom (List )}
10591087 catch
10601088 error :badarg ->
10611089 % Try to convert using elixir_utils to get proper UnicodeConversionError
1062- elixir_utils :characters_to_binary (List ),
1063- % If we get here, it's not a UTF-8 issue
1064- {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , List }}
1090+ try
1091+ elixir_utils :characters_to_binary (List ),
1092+ % If we get here, it's not a UTF-8 issue
1093+ {error , {? LOC (Line , Column ), " unsafe atom does not exist: " , List }}
1094+ catch
1095+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1096+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1097+ end
10651098 end ;
10661099unsafe_to_atom (List , Line , Column , # elixir_tokenizer {}) when is_list (List ) ->
10671100 try
10681101 {ok , list_to_atom (List )}
10691102 catch
10701103 error :badarg ->
10711104 % Try to convert using elixir_utils to get proper UnicodeConversionError
1072- elixir_utils :characters_to_binary (List ),
1073- % If we get here, it's not a UTF-8 issue, so it's some other badarg
1074- {error , {? LOC (Line , Column ), " invalid atom: " , List }}
1105+ try
1106+ elixir_utils :characters_to_binary (List ),
1107+ % If we get here, it's not a UTF-8 issue, so it's some other badarg
1108+ {error , {? LOC (Line , Column ), " invalid atom: " , List }}
1109+ catch
1110+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1111+ {error , {? LOC (Line , Column ), " invalid encoding in atom: " , elixir_utils :characters_to_list (Message )}}
1112+ end
10751113 end .
10761114
10771115collect_modifiers ([H | T ], Buffer ) when ? is_downcase (H ) or ? is_upcase (H ) or ? is_digit (H ) ->
@@ -1095,7 +1133,12 @@ extract_heredoc_with_interpolation(Line, Column, Scope, Interpol, T, H) ->
10951133 {Parts1 , {ShouldWarn , _ }} = lists :mapfoldl (Fun , {false , Line }, Parts0 ),
10961134 Parts2 = extract_heredoc_head (Parts1 ),
10971135 NewScope = maybe_heredoc_warn (ShouldWarn , Column , InterScope , H ),
1098- {ok , NewLine , NewColumn , tokens_to_binary (Parts2 ), Rest , NewScope };
1136+ try
1137+ {ok , NewLine , NewColumn , tokens_to_binary (Parts2 ), Rest , NewScope }
1138+ catch
1139+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1140+ {error , interpolation_format (Message , " (for heredoc starting at line ~B )" , [Line ], Line , Column , [H , H , H ], [H , H , H ])}
1141+ end ;
10991142
11001143 {error , Reason } ->
11011144 {error , interpolation_format (Reason , " (for heredoc starting at line ~B )" , [Line ], Line , Column , [H , H , H ], [H , H , H ])}
@@ -1166,8 +1209,13 @@ unescape_tokens(Tokens, Line, Column, #elixir_tokenizer{unescape=true}) ->
11661209 {error , Message , Token } ->
11671210 {error , {? LOC (Line , Column ), Message ++ " . Syntax error after: " , Token }}
11681211 end ;
1169- unescape_tokens (Tokens , _Line , _Column , # elixir_tokenizer {unescape = false }) ->
1170- {ok , tokens_to_binary (Tokens )}.
1212+ unescape_tokens (Tokens , Line , Column , # elixir_tokenizer {unescape = false }) ->
1213+ try
1214+ {ok , tokens_to_binary (Tokens )}
1215+ catch
1216+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1217+ {error , {? LOC (Line , Column ), " invalid encoding in tokens: " , elixir_utils :characters_to_list (Message )}}
1218+ end .
11711219
11721220tokens_to_binary (Tokens ) ->
11731221 [if is_list (Token ) -> elixir_utils :characters_to_binary (Token ); true -> Token end
@@ -1671,7 +1719,14 @@ tokenize_sigil_contents([H | T] = Original, [S | _] = SigilName, Line, Column, S
16711719 case elixir_interpolation :extract (Line , Column + 1 , Scope , ? is_downcase (S ), T , sigil_terminator (H )) of
16721720 {NewLine , NewColumn , Parts , Rest , NewScope } ->
16731721 Indentation = nil ,
1674- add_sigil_token (SigilName , Line , Column , NewLine , NewColumn , tokens_to_binary (Parts ), Rest , NewScope , Tokens , Indentation , <<H >>);
1722+ try
1723+ add_sigil_token (SigilName , Line , Column , NewLine , NewColumn , tokens_to_binary (Parts ), Rest , NewScope , Tokens , Indentation , <<H >>)
1724+ catch
1725+ error :#{'__struct__' := 'Elixir.UnicodeConversionError' , message := Message } ->
1726+ Sigil = [$~ , S , H ],
1727+ Message = " (for sigil ~ts starting at line ~B )" ,
1728+ interpolation_error (Message , [$~ ] ++ SigilName ++ Original , Scope , Tokens , Message , [Sigil , Line ], Line , Column , [H ], [sigil_terminator (H )])
1729+ end ;
16751730
16761731 {error , Reason } ->
16771732 Sigil = [$~ , S , H ],
0 commit comments