Skip to content

Commit 48b8efe

Browse files
author
José Valim
committed
Ensure proper tokenization of the ... identifier
1 parent 5d34fcd commit 48b8efe

File tree

2 files changed

+72
-60
lines changed

2 files changed

+72
-60
lines changed

lib/elixir/src/elixir_tokenizer.erl

Lines changed: 67 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -208,56 +208,6 @@ tokenize([$?,$\\,H|T], Line, Scope, Tokens) ->
208208
tokenize([$?,Char|T], Line, Scope, Tokens) ->
209209
tokenize(T, Line, Scope, [{ number, Line, Char }|Tokens]);
210210

211-
% Dot identifier/operators
212-
tokenize("..." ++ Rest, Line, Scope, Tokens) ->
213-
Token = check_call_identifier(identifier, Line, '...', Rest),
214-
tokenize(Rest, Line, Scope, [Token|Tokens]);
215-
216-
tokenize([$.,T|Tail], Line, Scope, Tokens) when ?is_space(T) ->
217-
case [T|Tail] of
218-
[$\r,$\n|Rest] -> tokenize([$.|Rest], Line + 1, Scope, Tokens);
219-
[$\n|Rest] -> tokenize([$.|Rest], Line + 1, Scope, Tokens);
220-
[_|Rest] -> tokenize([$.|Rest], Line, Scope, Tokens)
221-
end;
222-
223-
% ## Three Token Operators
224-
tokenize([$.,T1,T2,T3|Rest], Line, Scope, Tokens) when
225-
?unary_op3(T1, T2, T3); ?comp_op3(T1, T2, T3); ?and_op3(T1, T2, T3); ?or_op3(T1, T2, T3);
226-
?arrow_op3(T1, T2, T3); ?exp_op3(T1, T2, T3) ->
227-
handle_call_identifier(Rest, Line, list_to_atom([T1, T2, T3]), Scope, Tokens);
228-
229-
% ## Two Token Operators
230-
tokenize([$.,T1,T2|Rest], Line, Scope, Tokens) when
231-
?comp_op2(T1, T2); ?and_op(T1, T2); ?or_op(T1, T2); ?arrow_op(T1, T2);
232-
?in_match_op(T1, T2); ?two_op(T1, T2); ?stab_op(T1, T2) ->
233-
handle_call_identifier(Rest, Line, list_to_atom([T1, T2]), Scope, Tokens);
234-
235-
% ## Single Token Operators
236-
tokenize([$.,T|Rest], Line, Scope, Tokens) when
237-
?at_op(T); ?unary_op(T); ?dual_op(T); ?mult_op(T); ?comp_op(T);
238-
?match_op(T); ?pipe_op(T) ->
239-
handle_call_identifier(Rest, Line, list_to_atom([T]), Scope, Tokens);
240-
241-
% Dot call
242-
243-
% ## Exception for .( as it needs to be treated specially in the parser
244-
tokenize([$.,$(|Rest], Line, Scope, Tokens) ->
245-
tokenize([$(|Rest], Line, Scope, add_token_with_nl({ dot_call_op, Line, '.' }, Tokens));
246-
247-
tokenize([$.,H|T] = Original, Line, Scope, Tokens) when ?is_quote(H) ->
248-
case elixir_interpolation:extract(Line, Scope, true, T, H) of
249-
{ NewLine, [Part], Rest } when is_binary(Part) ->
250-
case unsafe_to_atom(Part, Line, Scope) of
251-
{ ok, Atom } ->
252-
Token = check_call_identifier(identifier, Line, Atom, Rest),
253-
tokenize(Rest, NewLine, Scope, [Token|add_token_with_nl({ '.', Line }, Tokens)]);
254-
{ error, Reason } ->
255-
{ error, Reason, Original, Tokens }
256-
end;
257-
{ error, Reason } ->
258-
interpolation_error(Reason, Original, Tokens, " (for function name starting at line ~B)", [Line])
259-
end;
260-
261211
% Heredocs
262212

263213
tokenize("\"\"\"" ++ T, Line, Scope, Tokens) ->
@@ -346,9 +296,13 @@ tokenize("\r\n" ++ Rest, Line, Scope, Tokens) ->
346296

347297
% Stand-alone tokens
348298

299+
tokenize("..." ++ Rest, Line, Scope, Tokens) ->
300+
Token = check_call_identifier(identifier, Line, '...', Rest),
301+
tokenize(Rest, Line, Scope, [Token|Tokens]);
302+
349303
% ## Three token operators
350304
tokenize([T1,T2,T3|Rest], Line, Scope, Tokens) when ?unary_op3(T1, T2, T3) ->
351-
handle_nonl_op(Rest, Line, unary_op, list_to_atom([T1,T2,T3]), Scope, Tokens);
305+
handle_unary_op(Rest, Line, unary_op, list_to_atom([T1,T2,T3]), Scope, Tokens);
352306

353307
tokenize([T1,T2,T3|Rest], Line, Scope, Tokens) when ?comp_op3(T1, T2, T3) ->
354308
handle_op(Rest, Line, comp_op, list_to_atom([T1,T2,T3]), Scope, Tokens);
@@ -404,16 +358,16 @@ tokenize([$&,D|Rest], Line, Scope, Tokens) when ?is_digit(D) ->
404358
tokenize([D|Rest], Line, Scope, [{ '&', Line }|Tokens]);
405359

406360
tokenize([T|Rest], Line, Scope, Tokens) when ?at_op(T) ->
407-
handle_nonl_op(Rest, Line, at_op, list_to_atom([T]), Scope, Tokens);
361+
handle_unary_op(Rest, Line, at_op, list_to_atom([T]), Scope, Tokens);
408362

409363
tokenize([T|Rest], Line, Scope, Tokens) when ?unary_op(T) ->
410-
handle_nonl_op(Rest, Line, unary_op, list_to_atom([T]), Scope, Tokens);
364+
handle_unary_op(Rest, Line, unary_op, list_to_atom([T]), Scope, Tokens);
411365

412366
tokenize([T|Rest], Line, Scope, Tokens) when ?comp_op(T) ->
413367
handle_op(Rest, Line, comp_op, list_to_atom([T]), Scope, Tokens);
414368

415369
tokenize([T|Rest], Line, Scope, Tokens) when ?dual_op(T) ->
416-
handle_nonl_op(Rest, Line, dual_op, list_to_atom([T]), Scope, Tokens);
370+
handle_unary_op(Rest, Line, dual_op, list_to_atom([T]), Scope, Tokens);
417371

418372
tokenize([T|Rest], Line, Scope, Tokens) when ?mult_op(T) ->
419373
handle_op(Rest, Line, mult_op, list_to_atom([T]), Scope, Tokens);
@@ -424,8 +378,11 @@ tokenize([T|Rest], Line, Scope, Tokens) when ?match_op(T) ->
424378
tokenize([T|Rest], Line, Scope, Tokens) when ?pipe_op(T) ->
425379
handle_op(Rest, Line, pipe_op, list_to_atom([T]), Scope, Tokens);
426380

427-
tokenize([$.|Rest], Line, Scope, Tokens) ->
428-
tokenize(Rest, Line, Scope, add_token_with_nl({ '.', Line }, Tokens));
381+
% Dot
382+
383+
tokenize([$.|T], Line, Scope, Tokens) ->
384+
{ Rest, Counter } = strip_space(T, 0),
385+
handle_dot([$.|Rest], Line + Counter, Scope, Tokens);
429386

430387
% Integers and floats
431388

@@ -475,10 +432,22 @@ tokenize([Space, Sign, NotMarker|T], Line, Scope, [{ Identifier, _, _ } = H|Toke
475432
% Spaces
476433

477434
tokenize([T|Rest], Line, Scope, Tokens) when ?is_horizontal_space(T) ->
478-
tokenize(Rest, Line, Scope, Tokens);
435+
tokenize(strip_horizontal_space(Rest), Line, Scope, Tokens);
479436
tokenize(T, Line, _Scope, Tokens) ->
480437
{ error, { Line, "invalid token: ", until_eol(T) }, T, Tokens }.
481438

439+
strip_horizontal_space([H|T]) when ?is_horizontal_space(H) ->
440+
strip_horizontal_space(T);
441+
strip_horizontal_space(T) ->
442+
T.
443+
444+
strip_space(T, Counter) ->
445+
case strip_horizontal_space(T) of
446+
"\r\n" ++ Rest -> strip_space(Rest, Counter + 1);
447+
"\n" ++ Rest -> strip_space(Rest, Counter + 1);
448+
Rest -> { Rest, Counter }
449+
end.
450+
482451
until_eol("\r\n" ++ _) -> [];
483452
until_eol("\n" ++ _) -> [];
484453
until_eol([]) -> [];
@@ -520,10 +489,10 @@ handle_strings(T, Line, H, Scope, Tokens) ->
520489
tokenize(Rest, NewLine, Scope, [Token|Tokens])
521490
end.
522491

523-
handle_nonl_op([$:|Rest], Line, _Kind, Op, Scope, Tokens) when ?is_space(hd(Rest)) ->
492+
handle_unary_op([$:|Rest], Line, _Kind, Op, Scope, Tokens) when ?is_space(hd(Rest)) ->
524493
tokenize(Rest, Line, Scope, [{ kw_identifier, Line, Op }|Tokens]);
525494

526-
handle_nonl_op(Rest, Line, Kind, Op, Scope, Tokens) ->
495+
handle_unary_op(Rest, Line, Kind, Op, Scope, Tokens) ->
527496
tokenize(Rest, Line, Scope, [{ Kind, Line, Op }|Tokens]).
528497

529498
handle_op([$:|Rest], Line, _Kind, Op, Scope, Tokens) when ?is_space(hd(Rest)) ->
@@ -532,6 +501,45 @@ handle_op([$:|Rest], Line, _Kind, Op, Scope, Tokens) when ?is_space(hd(Rest)) ->
532501
handle_op(Rest, Line, Kind, Op, Scope, Tokens) ->
533502
tokenize(Rest, Line, Scope, add_token_with_nl({ Kind, Line, Op }, Tokens)).
534503

504+
% ## Three Token Operators
505+
handle_dot([$.,T1,T2,T3|Rest], Line, Scope, Tokens) when
506+
?unary_op3(T1, T2, T3); ?comp_op3(T1, T2, T3); ?and_op3(T1, T2, T3); ?or_op3(T1, T2, T3);
507+
?arrow_op3(T1, T2, T3); ?exp_op3(T1, T2, T3) ->
508+
handle_call_identifier(Rest, Line, list_to_atom([T1, T2, T3]), Scope, Tokens);
509+
510+
% ## Two Token Operators
511+
handle_dot([$.,T1,T2|Rest], Line, Scope, Tokens) when
512+
?comp_op2(T1, T2); ?and_op(T1, T2); ?or_op(T1, T2); ?arrow_op(T1, T2);
513+
?in_match_op(T1, T2); ?two_op(T1, T2); ?stab_op(T1, T2) ->
514+
handle_call_identifier(Rest, Line, list_to_atom([T1, T2]), Scope, Tokens);
515+
516+
% ## Single Token Operators
517+
handle_dot([$.,T|Rest], Line, Scope, Tokens) when
518+
?at_op(T); ?unary_op(T); ?dual_op(T); ?mult_op(T); ?comp_op(T);
519+
?match_op(T); ?pipe_op(T) ->
520+
handle_call_identifier(Rest, Line, list_to_atom([T]), Scope, Tokens);
521+
522+
% ## Exception for .( as it needs to be treated specially in the parser
523+
handle_dot([$.,$(|Rest], Line, Scope, Tokens) ->
524+
tokenize([$(|Rest], Line, Scope, add_token_with_nl({ dot_call_op, Line, '.' }, Tokens));
525+
526+
handle_dot([$.,H|T] = Original, Line, Scope, Tokens) when ?is_quote(H) ->
527+
case elixir_interpolation:extract(Line, Scope, true, T, H) of
528+
{ NewLine, [Part], Rest } when is_binary(Part) ->
529+
case unsafe_to_atom(Part, Line, Scope) of
530+
{ ok, Atom } ->
531+
Token = check_call_identifier(identifier, Line, Atom, Rest),
532+
tokenize(Rest, NewLine, Scope, [Token|add_token_with_nl({ '.', Line }, Tokens)]);
533+
{ error, Reason } ->
534+
{ error, Reason, Original, Tokens }
535+
end;
536+
{ error, Reason } ->
537+
interpolation_error(Reason, Original, Tokens, " (for function name starting at line ~B)", [Line])
538+
end;
539+
540+
handle_dot([$.|Rest], Line, Scope, Tokens) ->
541+
tokenize(Rest, Line, Scope, add_token_with_nl({ '.', Line }, Tokens)).
542+
535543
handle_call_identifier(Rest, Line, Op, Scope, Tokens) ->
536544
Token = check_call_identifier(identifier, Line, Op, Rest),
537545
tokenize(Rest, Line, Scope, [Token|add_token_with_nl({ '.', Line }, Tokens)]).

lib/elixir/test/erlang/tokenizer_test.erl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,11 @@ identifier_test() ->
8181
[{paren_identifier,1,'a0c!'},{'(',1},{')',1}] = tokenize("a0c!()").
8282

8383
module_macro_test() ->
84-
[{identifier,1,'__MODULE__'}] = tokenize("__MODULE__").
84+
[{identifier,1,'__MODULE__'}] = tokenize("__MODULE__").
85+
86+
triple_dot_test() ->
87+
[{identifier,1,'...'}] = tokenize("..."),
88+
[{'.',1},{identifier,1,'..'}] = tokenize(". ..").
8589

8690
dot_test() ->
8791
[{identifier,1,foo},

0 commit comments

Comments
 (0)