15
15
T1 == $[ , T2 == $]
16
16
).
17
17
18
- % % New ops table
19
-
20
18
-define (at_op (T ),
21
19
T == $@ ).
22
20
23
21
-define (unary_op (T ),
24
- % T == $&;
22
+ T == $& ;
25
23
T == $! ;
26
24
T == $^ ).
27
25
@@ -259,7 +257,7 @@ tokenize([$.,T1,T2|Rest], Line, Scope, Tokens) when
259
257
% ## Single Token Operators
260
258
tokenize ([$. ,T |Rest ], Line , Scope , Tokens ) when
261
259
? at_op (T ); ? unary_op (T ); ? dual_op (T ); ? mult_op (T ); ? comp_op (T );
262
- ? match_op (T ); ? tail_op (T ); T == $& ->
260
+ ? match_op (T ); ? tail_op (T ) ->
263
261
handle_call_identifier (Rest , Line , list_to_atom ([T ]), Scope , Tokens );
264
262
265
263
% Dot call
@@ -339,7 +337,7 @@ tokenize([$:,T1,T2|Rest], Line, Scope, Tokens) when
339
337
% ## Single Token Operators
340
338
tokenize ([$: ,T |Rest ], Line , Scope , Tokens ) when
341
339
? at_op (T ); ? unary_op (T ); ? dual_op (T ); ? mult_op (T ); ? comp_op (T );
342
- ? match_op (T ); ? tail_op (T ); T == $& ; T == $ . ->
340
+ ? match_op (T ); ? tail_op (T ); T == $. ->
343
341
tokenize (Rest , Line , Scope , [{ atom , Line , list_to_atom ([T ]) }|Tokens ]);
344
342
345
343
% End of line
@@ -364,10 +362,6 @@ tokenize("\r\n" ++ Rest, Line, Scope, Tokens) ->
364
362
365
363
% Stand-alone tokens
366
364
367
- % ## &
368
- tokenize ([$& ,H |Rest ], Line , Scope , Tokens ) when ? is_digit (H ) ->
369
- tokenize (Rest , Line , Scope , [{ '&' , Line , [list_to_integer ([H ])] }|Tokens ]);
370
-
371
365
% ## Three token operators
372
366
tokenize ([T1 ,T2 ,T3 |Rest ], Line , Scope , Tokens ) when ? unary_op3 (T1 , T2 , T3 ) ->
373
367
handle_nonl_op (Rest , Line , unary_op , list_to_atom ([T1 ,T2 ,T3 ]), Scope , Tokens );
@@ -474,17 +468,13 @@ tokenize([H|_] = String, Line, Scope, Tokens) when ?is_upcase(H) ->
474
468
% Identifier
475
469
476
470
tokenize ([H |_ ] = String , Line , Scope , Tokens ) when ? is_downcase (H ); H == $_ ->
477
- case tokenize_any_identifier (String , Line , [], Scope ) of
478
- { error , _ } = Error -> Error ;
479
- { Rest , { Kind , _ , Identifier } } ->
480
- case check_keyword (Line , Kind , Identifier , Tokens ) of
481
- nomatch ->
482
- tokenize (Rest , Line , Scope , [{ Kind , Line , Identifier }|Tokens ]);
483
- { ok , [Check |T ] } ->
484
- handle_terminator (Rest , Line , Scope , Check , T );
485
- { error , Token } ->
486
- { error , { Line , " syntax error before: " , Token } }
487
- end
471
+ case tokenize_any_identifier (String , Line , Scope , Tokens ) of
472
+ { keyword , Rest , Check , T } ->
473
+ handle_terminator (Rest , Line , Scope , Check , T );
474
+ { identifier , Rest , Token } ->
475
+ tokenize (Rest , Line , Scope , [Token |Tokens ]);
476
+ { error , _ } = Error ->
477
+ Error
488
478
end ;
489
479
490
480
% Ambiguous unary/binary operators tokens
@@ -763,50 +753,39 @@ tokenize_identifier(Rest, Acc) ->
763
753
764
754
% % Tokenize any identifier, handling kv, punctuated, paren, bracket and do identifiers.
765
755
766
- tokenize_any_identifier (String , Line , Acc , Scope ) ->
767
- { Rest , Identifier } = tokenize_identifier (String , Acc ),
756
+ tokenize_any_identifier (String , Line , Scope , Tokens ) ->
757
+ { Rest , Identifier } = tokenize_identifier (String , [] ),
768
758
769
759
case Rest of
770
760
[H |T ] when H == $? ; H == $! ->
771
761
Atom = unsafe_to_atom (Identifier ++ [H ], Scope ),
772
- tokenize_kw_or_call_identifier ( punctuated_identifier , Line , Atom , T );
762
+ tokenize_kw_or_other ( T , punctuated_identifier , Line , Atom , Tokens );
773
763
_ ->
774
764
Atom = unsafe_to_atom (Identifier , Scope ),
775
- tokenize_kw_or_call_identifier ( identifier , Line , Atom , Rest )
765
+ tokenize_kw_or_other ( Rest , identifier , Line , Atom , Tokens )
776
766
end .
777
767
778
- % % Tokenize kw or call identifier (paren | bracket | do)
779
-
780
- tokenize_kw_or_call_identifier (_Kind , Line , Atom , [$: ,H |T ]) when ? is_space (H ) ->
781
- { [H |T ], { kw_identifier , Line , Atom } };
768
+ tokenize_kw_or_other ([$: ,H |T ], _Kind , Line , Atom , _Tokens ) when ? is_space (H ) ->
769
+ { identifier , [H |T ], { kw_identifier , Line , Atom } };
782
770
783
- tokenize_kw_or_call_identifier ( _Kind , Line , Atom , [ $: , H | _ ] ) when ? is_atom_start (H ) ->
771
+ tokenize_kw_or_other ([ $: , H | _ ], _Kind , Line , Atom , _Tokens ) when ? is_atom_start (H ) ->
784
772
{ error , { Line , " keyword argument must be followed by space after: " , atom_to_list (Atom ) ++ [$: ] } };
785
773
786
- tokenize_kw_or_call_identifier (Kind , Line , Atom , Rest ) ->
787
- { Rest , check_call_identifier (Kind , Line , Atom , Rest ) }.
774
+ tokenize_kw_or_other (Rest , Kind , Line , Atom , Tokens ) ->
775
+ case check_keyword (Line , Atom , Tokens ) of
776
+ nomatch ->
777
+ { identifier , Rest , check_call_identifier (Kind , Line , Atom , Rest ) };
778
+ { ok , [Check |T ] } ->
779
+ { keyword , Rest , Check , T };
780
+ { error , Token } ->
781
+ { error , { Line , " syntax error before: " , Token } }
782
+ end .
788
783
789
784
% % Check if it is a call identifier (paren | bracket | do)
790
785
791
786
check_call_identifier (_Kind , Line , Atom , [$( |_ ]) -> { paren_identifier , Line , Atom };
792
787
check_call_identifier (_Kind , Line , Atom , [$[ |_ ]) -> { bracket_identifier , Line , Atom };
793
- check_call_identifier (Kind , Line , Atom , Rest ) ->
794
- case next_is_block (Rest ) of
795
- false -> { Kind , Line , Atom };
796
- BlockIdentifier -> { BlockIdentifier , Line , Atom }
797
- end .
798
-
799
- next_is_block ([Space |Tokens ]) when Space == $\t ; Space == $\s ->
800
- next_is_block (Tokens );
801
-
802
- next_is_block ([$d ,$o ,H |_ ]) when ? is_identifier (H ); ? is_terminator (H ) ->
803
- false ;
804
-
805
- next_is_block ([$d ,$o |_ ]) ->
806
- do_identifier ;
807
-
808
- next_is_block (_ ) ->
809
- false .
788
+ check_call_identifier (Kind , Line , Atom , _Rest ) -> { Kind , Line , Atom }.
810
789
811
790
add_token_with_nl (Left , [{eol ,_ ,newline }|T ]) -> [Left |T ];
812
791
add_token_with_nl (Left , T ) -> [Left |T ].
@@ -884,40 +863,47 @@ terminator('<<') -> '>>'.
884
863
885
864
% % Keywords checking
886
865
887
- check_keyword (Line , Identifier , Atom , [{ '.' , _ }|_ ] = Tokens ) ->
888
- { ok , [{ Identifier , Line , Atom }|Tokens ] };
866
+ check_keyword (_Line , _Atom , [{ '.' , _ }|_ ]) ->
867
+ nomatch ;
868
+
869
+ check_keyword (Line , do , [{ identifier , Line , Atom }|T ]) ->
870
+ { ok , [{ do , Line }, { do_identifier , Line , Atom }|T ] };
889
871
890
- check_keyword (Line , Identifier , Atom , Tokens ) when
891
- Identifier == identifier ; Identifier == do_identifier ;
892
- Identifier == bracket_identifier ; Identifier == paren_identifier ->
872
+ check_keyword (Line , do , Tokens ) ->
873
+ case do_keyword_valid (Tokens ) of
874
+ true -> { ok , [{ do , Line }|Tokens ] };
875
+ false -> { error , " do" }
876
+ end ;
877
+
878
+ check_keyword (Line , Atom , Tokens ) ->
893
879
case keyword (Atom ) of
894
- do ->
895
- case do_keyword_valid (Tokens ) of
896
- true -> { ok , [{ Atom , Line }|Tokens ] };
897
- false -> { error , " do" }
898
- end ;
899
880
false -> nomatch ;
900
- true -> { ok , [{ Atom , Line }|Tokens ] };
881
+ token -> { ok , [{ Atom , Line }|Tokens ] };
901
882
block -> { ok , [{ block_identifier , Line , Atom }|Tokens ] };
902
883
unary_op -> { ok , [{ unary_op , Line , Atom }|Tokens ] };
903
884
Kind -> { ok , add_token_with_nl ({ Kind , Line , Atom }, Tokens ) }
904
- end ;
905
-
906
- check_keyword (_ , _ , _ , _ ) -> nomatch .
885
+ end .
907
886
887
+ % % do is only valid after the end, true, false and nil keywords
908
888
do_keyword_valid ([{ Atom , _ }|_ ]) ->
909
- is_boolean (keyword (Atom ));
910
- do_keyword_valid (_ ) -> true .
889
+ case Atom of
890
+ 'end' -> true ;
891
+ nil -> true ;
892
+ true -> true ;
893
+ false -> true ;
894
+ _ -> keyword (Atom ) == false
895
+ end ;
896
+
897
+ do_keyword_valid (_ ) ->
898
+ true .
911
899
912
900
% Regular keywords
913
- keyword ('fn' ) -> true ;
914
- keyword ('end' ) -> true ;
915
- keyword ('true' ) -> true ;
916
- keyword ('false' ) -> true ;
917
- keyword ('nil' ) -> true ;
918
-
919
- % Special handling for do
920
- keyword ('do' ) -> do ;
901
+ keyword ('fn' ) -> token ;
902
+ keyword ('do' ) -> token ;
903
+ keyword ('end' ) -> token ;
904
+ keyword ('true' ) -> token ;
905
+ keyword ('false' ) -> token ;
906
+ keyword ('nil' ) -> token ;
921
907
922
908
% Operators keywords
923
909
keyword ('not' ) -> unary_op ;
0 commit comments