@@ -119,20 +119,29 @@ tokenize(String, Line, Column, Opts) ->
119
119
false -> <<" nofile" >>
120
120
end ,
121
121
122
- Existing = case lists :keyfind (existing_atoms_only , 1 , Opts ) of
123
- {existing_atoms_only , true } -> true ;
124
- false -> false
122
+ ExistingAtomsOnly = case lists :keyfind (existing_atoms_only , 1 , Opts ) of
123
+ {existing_atoms_only , ExistingAtomsOnlyBool } when
124
+ is_boolean (ExistingAtomsOnlyBool ) -> ExistingAtomsOnlyBool ;
125
+ _ -> false
125
126
end ,
126
127
127
- Check = case lists :keyfind (check_terminators , 1 , Opts ) of
128
- {check_terminators , false } -> false ;
129
- false -> true
128
+ CheckTerminators = case lists :keyfind (check_terminators , 1 , Opts ) of
129
+ {check_terminators , CheckTerminatorsBool } when
130
+ is_boolean (CheckTerminatorsBool ) -> CheckTerminatorsBool ;
131
+ _ -> true
132
+ end ,
133
+
134
+ PreserveComments = case lists :keyfind (preserve_comments , 1 , Opts ) of
135
+ {preserve_comments , PreserveCommentsBool } when
136
+ is_boolean (PreserveCommentsBool ) -> PreserveCommentsBool ;
137
+ _ -> false
130
138
end ,
131
139
132
140
tokenize (String , Line , Column , # elixir_tokenizer {
133
141
file = File ,
134
- existing_atoms_only = Existing ,
135
- check_terminators = Check ,
142
+ existing_atoms_only = ExistingAtomsOnly ,
143
+ check_terminators = CheckTerminators ,
144
+ preserve_comments = PreserveComments ,
136
145
identifier_tokenizer = elixir_config :get (identifier_tokenizer )
137
146
}).
138
147
@@ -170,8 +179,14 @@ tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) ->
170
179
% Comments
171
180
172
181
tokenize ([$# | String ], Line , Column , Scope , Tokens ) ->
173
- Rest = tokenize_comment (String ),
174
- tokenize (Rest , Line , Column , Scope , Tokens );
182
+ {Rest , Comment , Length } = tokenize_comment (String , [$# ], 1 ),
183
+ case Scope # elixir_tokenizer .preserve_comments of
184
+ true ->
185
+ CommentToken = {comment , {Line , Column , Column + Length }, Comment },
186
+ tokenize (Rest , Line , Column + Length , Scope , [CommentToken | Tokens ]);
187
+ false ->
188
+ tokenize (Rest , Line , Column , Scope , Tokens )
189
+ end ;
175
190
176
191
% Sigils
177
192
@@ -453,8 +468,8 @@ tokenize([$% | T], Line, Column, Scope, Tokens) ->
453
468
tokenize (T , Line , Column + 1 , Scope , [{'%' , {Line , Column , Column + 1 }} | Tokens ]);
454
469
455
470
tokenize ([$. | T ], Line , Column , Scope , Tokens ) ->
456
- {Rest , Counter , Offset } = strip_dot_space (T , 0 , Column + 1 ),
457
- handle_dot ([$. | Rest ], Line + Counter , Offset - 1 , Column , Scope , Tokens );
471
+ {Rest , Counter , Offset , CommentTokens } = strip_dot_space (T , 0 , Column + 1 , Line , [] ),
472
+ handle_dot ([$. | Rest ], Line , Offset - 1 , Column , Scope , Tokens , CommentTokens , Counter );
458
473
459
474
% Identifiers
460
475
@@ -498,12 +513,18 @@ strip_horizontal_space([H | T], Counter) when ?is_horizontal_space(H) ->
498
513
strip_horizontal_space (T , Counter ) ->
499
514
{T , Counter }.
500
515
501
- strip_dot_space (T , Counter , Column ) ->
516
+ strip_dot_space (T , Counter , Column , StartLine , Tokens ) ->
502
517
case strip_horizontal_space (T ) of
503
- {" #" ++ Rest , _ } -> strip_dot_space (tokenize_comment (Rest ), Counter , 1 );
504
- {" \r\n " ++ Rest , _ } -> strip_dot_space (Rest , Counter + 1 , 1 );
505
- {" \n " ++ Rest , _ } -> strip_dot_space (Rest , Counter + 1 , 1 );
506
- {Rest , Length } -> {Rest , Counter , Column + Length }
518
+ {" #" ++ R , _ } ->
519
+ {Rest , Comment , Length } = tokenize_comment (R , [$# ], 1 ),
520
+ CommentToken = {comment , {StartLine + Counter , Column , Column + Length }, Comment },
521
+ strip_dot_space (Rest , Counter , 1 , StartLine , [CommentToken | Tokens ]);
522
+ {" \r\n " ++ Rest , _ } ->
523
+ strip_dot_space (Rest , Counter + 1 , 1 , StartLine , Tokens );
524
+ {" \n " ++ Rest , _ } ->
525
+ strip_dot_space (Rest , Counter + 1 , 1 , StartLine , Tokens );
526
+ {Rest , Length } ->
527
+ {Rest , Counter , Column + Length , Tokens }
507
528
end .
508
529
509
530
handle_char (7 ) -> {" \\ a" , " alert" };
@@ -572,51 +593,59 @@ handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) ->
572
593
add_token_with_nl ({Kind , {Line , Column , Column + Length }, Op }, Tokens ))
573
594
end .
574
595
596
+ handle_comments (CommentTokens , Tokens , Scope ) ->
597
+ case Scope # elixir_tokenizer .preserve_comments of
598
+ true -> lists :append (CommentTokens , Tokens );
599
+ false -> Tokens
600
+ end .
601
+
575
602
% ## Three Token Operators
576
- handle_dot ([$. , T1 , T2 , T3 | Rest ], Line , Column , DotColumn , Scope , Tokens ) when
603
+ handle_dot ([$. , T1 , T2 , T3 | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when
577
604
? unary_op3 (T1 , T2 , T3 ); ? comp_op3 (T1 , T2 , T3 ); ? and_op3 (T1 , T2 , T3 ); ? or_op3 (T1 , T2 , T3 );
578
605
? arrow_op3 (T1 , T2 , T3 ); ? three_op (T1 , T2 , T3 ) ->
579
- handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 3 , list_to_atom ([T1 , T2 , T3 ]), Scope , Tokens );
606
+ handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 3 , list_to_atom ([T1 , T2 , T3 ]), Scope , Tokens , CommentTokens , Counter );
580
607
581
608
% ## Two Token Operators
582
- handle_dot ([$. , T1 , T2 | Rest ], Line , Column , DotColumn , Scope , Tokens ) when
609
+ handle_dot ([$. , T1 , T2 | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when
583
610
? comp_op2 (T1 , T2 ); ? rel_op2 (T1 , T2 ); ? and_op (T1 , T2 ); ? or_op (T1 , T2 );
584
611
? arrow_op (T1 , T2 ); ? in_match_op (T1 , T2 ); ? two_op (T1 , T2 ); ? stab_op (T1 , T2 );
585
612
? type_op (T1 , T2 ) ->
586
- handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 2 , list_to_atom ([T1 , T2 ]), Scope , Tokens );
613
+ handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 2 , list_to_atom ([T1 , T2 ]), Scope , Tokens , CommentTokens , Counter );
587
614
588
615
% ## Single Token Operators
589
- handle_dot ([$. , T | Rest ], Line , Column , DotColumn , Scope , Tokens ) when
616
+ handle_dot ([$. , T | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when
590
617
? at_op (T ); ? unary_op (T ); ? capture_op (T ); ? dual_op (T ); ? mult_op (T );
591
618
? rel_op (T ); ? match_op (T ); ? pipe_op (T ) ->
592
- handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 1 , list_to_atom ([T ]), Scope , Tokens );
619
+ handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 1 , list_to_atom ([T ]), Scope , Tokens , CommentTokens , Counter );
593
620
594
621
% ## Exception for .( as it needs to be treated specially in the parser
595
- handle_dot ([$. , $( | Rest ], Line , Column , DotColumn , Scope , Tokens ) ->
596
- tokenize ([$( | Rest ], Line , Column + 2 , Scope , add_token_with_nl ({dot_call_op , {Line , DotColumn , DotColumn + 1 }, '.' }, Tokens ));
622
+ handle_dot ([$. , $( | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) ->
623
+ TokensSoFar = add_token_with_nl ({dot_call_op , {Line , DotColumn , DotColumn + 1 }, '.' }, Tokens ),
624
+ tokenize ([$( | Rest ], Line + Counter , Column + 2 , Scope , handle_comments (CommentTokens , TokensSoFar , Scope ));
597
625
598
- handle_dot ([$. , H | T ] = Original , Line , Column , DotColumn , Scope , Tokens ) when ? is_quote (H ) ->
626
+ handle_dot ([$. , H | T ] = Original , Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when ? is_quote (H ) ->
599
627
case elixir_interpolation :extract (Line , Column + 2 , Scope , true , T , H ) of
600
628
{NewLine , NewColumn , [Part ], Rest } when is_binary (Part ) ->
601
629
case unsafe_to_atom (Part , Line , Scope ) of
602
630
{ok , Atom } ->
603
- Token = check_call_identifier (Line , Column , max (NewColumn - Column , 0 ), Atom , Rest ),
604
- tokenize ( Rest , NewLine , NewColumn , Scope ,
605
- [Token | add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens )]);
631
+ Token = check_call_identifier (Line + Counter , Column , max (NewColumn - Column , 0 ), Atom , Rest ),
632
+ TokensSoFar = add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens ) ,
633
+ tokenize ( Rest , NewLine , NewColumn , Scope , [Token | handle_comments ( CommentTokens , TokensSoFar , Scope )]);
606
634
{error , Reason } ->
607
635
{error , Reason , Original , Tokens }
608
636
end ;
609
637
{error , Reason } ->
610
638
interpolation_error (Reason , Original , Tokens , " (for function name starting at line ~B )" , [Line ])
611
639
end ;
612
640
613
- handle_dot ([$. | Rest ], Line , Column , DotColumn , Scope , Tokens ) ->
614
- tokenize (Rest , Line , Column + 1 , Scope , add_token_with_nl ({'.' , {Line , DotColumn , DotColumn + 1 }}, Tokens )).
641
+ handle_dot ([$. | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) ->
642
+ TokensSoFar = add_token_with_nl ({'.' , {Line , DotColumn , DotColumn + 1 }}, Tokens ),
643
+ tokenize (Rest , Line + Counter , Column + 1 , Scope , handle_comments (CommentTokens , TokensSoFar , Scope )).
615
644
616
- handle_call_identifier (Rest , Line , Column , DotColumn , Length , Op , Scope , Tokens ) ->
617
- {_ , {_ , _ , NewColumn }, _ } = Token = check_call_identifier (Line , Column , Length , Op , Rest ),
618
- tokenize ( Rest , Line , NewColumn , Scope ,
619
- [Token | add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens )]).
645
+ handle_call_identifier (Rest , Line , Column , DotColumn , Length , Op , Scope , Tokens , CommentTokens , Counter ) ->
646
+ {_ , {NewLine , _ , NewColumn }, _ } = Token = check_call_identifier (Line + Counter , Column , Length , Op , Rest ),
647
+ TokensSoFar = add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens ) ,
648
+ tokenize ( Rest , NewLine , NewColumn , Scope , [Token | handle_comments ( CommentTokens , TokensSoFar , Scope )]).
620
649
621
650
% ## Ambiguous unary/binary operators tokens
622
651
handle_space_sensitive_tokens ([Sign , NotMarker | T ], Line , Column , Scope , [{Identifier , _ , _ } = H | Tokens ]) when
@@ -825,10 +854,14 @@ tokenize_bin(Rest, Acc, Length) ->
825
854
826
855
% % Comments
827
856
828
- tokenize_comment (" \r\n " ++ _ = Rest ) -> Rest ;
829
- tokenize_comment (" \n " ++ _ = Rest ) -> Rest ;
830
- tokenize_comment ([_ | Rest ]) -> tokenize_comment (Rest );
831
- tokenize_comment ([]) -> [].
857
+ tokenize_comment (" \r\n " ++ _ = Rest , Acc , Length ) ->
858
+ {Rest , lists :reverse (Acc ), Length };
859
+ tokenize_comment (" \n " ++ _ = Rest , Acc , Length ) ->
860
+ {Rest , lists :reverse (Acc ), Length };
861
+ tokenize_comment ([H | Rest ], Acc , Length ) ->
862
+ tokenize_comment (Rest , [H | Acc ], Length + 1 );
863
+ tokenize_comment ([], Acc , Length ) ->
864
+ {[], Acc , Length }.
832
865
833
866
% % Identifiers
834
867
0 commit comments