@@ -353,11 +353,11 @@ tokenize("=>" ++ Rest, Line, Column, Scope, Tokens) ->
353353 tokenize (Rest , Line , Column + 2 , Scope , add_token_with_eol (Token , Tokens ));
354354
355355tokenize (" ..//" ++ Rest = String , Line , Column , Scope , Tokens ) ->
356- case strip_horizontal_space (Rest , 0 ) of
357- {[$/ | _ ] = Remaining , Extra } ->
356+ case strip_horizontal_space (Rest , Line , Column + 4 , Scope ) of
357+ {[$/ | _ ] = Remaining , NewLine , NewColumn } ->
358358 Token = {identifier , {Line , Column , nil }, '..//' },
359- tokenize (Remaining , Line , Column + 4 + Extra , Scope , [Token | Tokens ]);
360- {_ , _ } ->
359+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ]);
360+ {_ , _ , _ } ->
361361 unexpected_token (String , Line , Column , Scope , Tokens )
362362 end ;
363363
@@ -464,17 +464,17 @@ tokenize([T1, T2 | Rest], Line, Column, Scope, Tokens) when ?stab_op(T1, T2) ->
464464
465465tokenize ([$& | Rest ], Line , Column , Scope , Tokens ) ->
466466 Kind =
467- case strip_horizontal_space (Rest , 0 ) of
468- {[Int | _ ], 0 } when ? is_digit (Int ) ->
467+ case strip_horizontal_space (Rest , Line , 0 , Scope ) of
468+ {[Int | _ ], Line , 0 } when ? is_digit (Int ) ->
469469 capture_int ;
470470
471- {[$/ | NewRest ], _ } ->
472- case strip_horizontal_space (NewRest , 0 ) of
473- {[$/ | _ ], _ } -> capture_op ;
474- {_ , _ } -> identifier
471+ {[$/ | NewRest ], _ , _ } ->
472+ case strip_horizontal_space (NewRest , Line , 0 , Scope ) of
473+ {[$/ | _ ], _ , _ } -> capture_op ;
474+ {_ , _ , _ } -> identifier
475475 end ;
476476
477- {_ , _ } ->
477+ {_ , _ , _ } ->
478478 capture_op
479479 end ,
480480
@@ -612,8 +612,8 @@ tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) ->
612612% Spaces
613613
614614tokenize ([T | Rest ], Line , Column , Scope , Tokens ) when ? is_horizontal_space (T ) ->
615- {Remaining , Stripped } = strip_horizontal_space (Rest , 0 ),
616- handle_space_sensitive_tokens (Remaining , Line , Column + 1 + Stripped , Scope , Tokens );
615+ {Remaining , NewLine , NewColumn } = strip_horizontal_space (Rest , Line , Column + 1 , Scope ),
616+ handle_space_sensitive_tokens (Remaining , NewLine , NewColumn , Scope , Tokens );
617617
618618% End of line
619619
@@ -735,32 +735,39 @@ unexpected_token([T | Rest], Line, Column, Scope, Tokens) ->
735735 error ({? LOC (Line , Column ), " unexpected token: " , Message }, Rest , Scope , Tokens ).
736736
737737tokenize_eol (Rest , Line , Scope , Tokens ) ->
738- {StrippedRest , Column } = strip_horizontal_space (Rest , Scope # elixir_tokenizer .column ),
739- IndentedScope = Scope # elixir_tokenizer {indentation = Column - 1 },
740- tokenize (StrippedRest , Line + 1 , Column , IndentedScope , Tokens ).
741-
742- strip_horizontal_space ([H | T ], Counter ) when ? is_horizontal_space (H ) ->
743- strip_horizontal_space (T , Counter + 1 );
744- strip_horizontal_space (T , Counter ) ->
745- {T , Counter }.
738+ {StrippedRest , NewLine , NewColumn } =
739+ strip_horizontal_space (Rest , Line + 1 , Scope # elixir_tokenizer .column , Scope ),
740+ IndentedScope = Scope # elixir_tokenizer {indentation = NewColumn - 1 },
741+ tokenize (StrippedRest , NewLine , NewColumn , IndentedScope , Tokens ).
742+
743+ strip_horizontal_space ([H | T ], Line , Counter , Scope ) when ? is_horizontal_space (H ) ->
744+ strip_horizontal_space (T , Line , Counter + 1 , Scope );
745+ % % \\ at the end of lines is treated as horizontal whitespace
746+ % % except at the very end of the buffer, which we treat as incomplete
747+ strip_horizontal_space (" \\\n " ++ T , Line , _Counter , Scope ) when T /= [] ->
748+ strip_horizontal_space (T , Line + 1 , Scope # elixir_tokenizer .column , Scope );
749+ strip_horizontal_space (" \\\r\n " ++ T , Line , _Counter , Scope ) when T /= [] ->
750+ strip_horizontal_space (T , Line + 1 , Scope # elixir_tokenizer .column , Scope );
751+ strip_horizontal_space (T , Line , Counter , _Scope ) ->
752+ {T , Line , Counter }.
746753
747754tokenize_dot (T , Line , Column , DotInfo , Scope , Tokens ) ->
748- case strip_horizontal_space (T , 0 ) of
749- {[$# | R ], _ } ->
755+ case strip_horizontal_space (T , Line , Column , Scope ) of
756+ {[$# | R ], NewLine , NewColumn } ->
750757 case tokenize_comment (R , [$# ]) of
751758 {error , Char , Reason } ->
752- error_comment (Char , Reason , [$# | R ], Line , Column , Scope , Tokens );
759+ error_comment (Char , Reason , [$# | R ], NewLine , NewColumn , Scope , Tokens );
753760
754761 {Rest , Comment } ->
755- preserve_comments (Line , Column , Tokens , Comment , Rest , Scope ),
756- tokenize_dot (Rest , Line , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens )
762+ preserve_comments (NewLine , NewColumn , Tokens , Comment , Rest , Scope ),
763+ tokenize_dot (Rest , NewLine , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens )
757764 end ;
758- {" \r\n " ++ Rest , _ } ->
759- tokenize_dot (Rest , Line + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
760- {" \n " ++ Rest , _ } ->
761- tokenize_dot (Rest , Line + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
762- {Rest , Length } ->
763- handle_dot ([$. | Rest ], Line , Column + Length , DotInfo , Scope , Tokens )
765+ {" \r\n " ++ Rest , NewLine , _NewColumn } ->
766+ tokenize_dot (Rest , NewLine + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
767+ {" \n " ++ Rest , NewLine , _NewColumn } ->
768+ tokenize_dot (Rest , NewLine + 1 , Scope # elixir_tokenizer .column , DotInfo , Scope , Tokens );
769+ {Rest , NewLine , NewColumn } ->
770+ handle_dot ([$. | Rest ], NewLine , NewColumn , DotInfo , Scope , Tokens )
764771 end .
765772
766773handle_char (0 ) -> {" \\ 0" , " null byte" };
@@ -871,25 +878,25 @@ handle_unary_op([$: | Rest], Line, Column, _Kind, Length, Op, Scope, Tokens) whe
871878 tokenize (Rest , Line , Column + Length + 1 , Scope , [Token | Tokens ]);
872879
873880handle_unary_op (Rest , Line , Column , Kind , Length , Op , Scope , Tokens ) ->
874- case strip_horizontal_space (Rest , 0 ) of
875- {[$/ | _ ] = Remaining , Extra } ->
881+ case strip_horizontal_space (Rest , Line , Column + Length , Scope ) of
882+ {[$/ | _ ] = Remaining , NewLine , NewColumn } ->
876883 Token = {identifier , {Line , Column , nil }, Op },
877- tokenize (Remaining , Line , Column + Length + Extra , Scope , [Token | Tokens ]);
878- {Remaining , Extra } ->
884+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ]);
885+ {Remaining , NewLine , NewColumn } ->
879886 Token = {Kind , {Line , Column , nil }, Op },
880- tokenize (Remaining , Line , Column + Length + Extra , Scope , [Token | Tokens ])
887+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ])
881888 end .
882889
883890handle_op ([$: | Rest ], Line , Column , _Kind , Length , Op , Scope , Tokens ) when ? is_space (hd (Rest )) ->
884891 Token = {kw_identifier , {Line , Column , nil }, Op },
885892 tokenize (Rest , Line , Column + Length + 1 , Scope , [Token | Tokens ]);
886893
887894handle_op (Rest , Line , Column , Kind , Length , Op , Scope , Tokens ) ->
888- case strip_horizontal_space (Rest , 0 ) of
889- {[$/ | _ ] = Remaining , Extra } ->
895+ case strip_horizontal_space (Rest , Line , Column + Length , Scope ) of
896+ {[$/ | _ ] = Remaining , NewLine , NewColumn } ->
890897 Token = {identifier , {Line , Column , nil }, Op },
891- tokenize (Remaining , Line , Column + Length + Extra , Scope , [Token | Tokens ]);
892- {Remaining , Extra } ->
898+ tokenize (Remaining , NewLine , NewColumn , Scope , [Token | Tokens ]);
899+ {Remaining , NewLine , NewColumn } ->
893900 NewScope =
894901 % % TODO: Remove these deprecations on Elixir v2.0
895902 case Op of
@@ -910,7 +917,7 @@ handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) ->
910917 end ,
911918
912919 Token = {Kind , {Line , Column , previous_was_eol (Tokens )}, Op },
913- tokenize (Remaining , Line , Column + Length + Extra , NewScope , add_token_with_eol (Token , Tokens ))
920+ tokenize (Remaining , NewLine , NewColumn , NewScope , add_token_with_eol (Token , Tokens ))
914921 end .
915922
916923% ## Three Token Operators
@@ -996,12 +1003,21 @@ handle_call_identifier(Rest, Line, Column, DotInfo, Length, UnencodedOp, Scope,
9961003
9971004% ## Ambiguous unary/binary operators tokens
9981005% Keywords are not ambiguous operators
999- handle_space_sensitive_tokens ([Sign , $: , Space | _ ] = String , Line , Column , Scope , Tokens ) when ? dual_op (Sign ), ? is_space (Space ) ->
1006+ handle_space_sensitive_tokens ([Sign , $: , Space | _ ] = String , Line , Column , Scope , Tokens ) when
1007+ ? dual_op (Sign ), ? is_space (Space ) ->
10001008 tokenize (String , Line , Column , Scope , Tokens );
10011009
10021010% But everything else, except other operators, are
10031011handle_space_sensitive_tokens ([Sign , NotMarker | T ], Line , Column , Scope , [{identifier , _ , _ } = H | Tokens ]) when
1004- ? dual_op (Sign ), not (? is_space (NotMarker )), NotMarker =/= Sign , NotMarker =/= $/ , NotMarker =/= $> ->
1012+ ? dual_op (Sign ), not (? is_space (NotMarker )),
1013+ % % Do not match ++ or --
1014+ NotMarker =/= Sign ,
1015+ % % Do not match +/2 or -/2
1016+ NotMarker =/= $/ ,
1017+ % % Do not match ->
1018+ NotMarker =/= $> ,
1019+ % % Do not match +\\n or -\\n (it should be treated as if a space is there)
1020+ NotMarker =/= $\\ ->
10051021 Rest = [NotMarker | T ],
10061022 DualOpToken = {dual_op , {Line , Column , nil }, list_to_atom ([Sign ])},
10071023 tokenize (Rest , Line , Column + 1 , Scope , [DualOpToken , setelement (1 , H , op_identifier ) | Tokens ]);
@@ -1664,8 +1680,8 @@ tokenize_keyword(block, Rest, Line, Column, Atom, Length, Scope, Tokens) ->
16641680
16651681tokenize_keyword (Kind , Rest , Line , Column , Atom , Length , Scope , Tokens ) ->
16661682 NewTokens =
1667- case strip_horizontal_space (Rest , 0 ) of
1668- {[$/ | _ ], _ } ->
1683+ case strip_horizontal_space (Rest , Line , Column , Scope ) of
1684+ {[$/ | _ ], _ , _ } ->
16691685 [{identifier , {Line , Column , nil }, Atom } | Tokens ];
16701686
16711687 _ ->
0 commit comments