@@ -179,6 +179,20 @@ impl LexArgs<'_, '_> {
179
179
}
180
180
}
181
181
182
+ // I can't seem to write a function signature for 'cloning' this type that the borrow checker is happy with.
183
+ // It can't be cloned in the usual way because it contains a mutable reference.
184
+ // The borrow checker is smart enough to see that the mutable references don't overlap when you construct the clone
185
+ // in place, and this macro just makes it more convenient to do that.
186
+ macro_rules! lex_args_copy {
187
+ ( $args: ident) => {
188
+ LexArgs {
189
+ input: $args. input,
190
+ offset: $args. offset,
191
+ lex_state: $args. lex_state,
192
+ }
193
+ } ;
194
+ }
195
+
182
196
type LexerFn = fn ( LexArgs ) -> OffsetAndTokenType ;
183
197
184
198
const COMMON_LEXER_MAP : [ Option < LexerFn > ; 256 ] = make_byte_map (
@@ -980,54 +994,150 @@ fn consume_to_eof(input: &str, token_type: RawTokenType) -> (usize, RawTokenType
980
994
981
995
// region: directives/comments
982
996
983
- fn compiler_directive_type ( input : & str , offset : usize ) -> RawTokenType {
984
- let count = count_matching ( input, offset, |b| b. is_ascii_alphabetic ( ) ) ;
985
-
986
- let directive = & input[ offset..( offset + count) ] ;
987
-
988
- if directive. eq_ignore_ascii_case ( "if" ) {
989
- TT :: ConditionalDirective ( CDK :: If )
990
- } else if directive. eq_ignore_ascii_case ( "ifdef" ) {
991
- TT :: ConditionalDirective ( CDK :: Ifdef )
992
- } else if directive. eq_ignore_ascii_case ( "ifndef" ) {
993
- TT :: ConditionalDirective ( CDK :: Ifndef )
994
- } else if directive. eq_ignore_ascii_case ( "ifopt" ) {
995
- TT :: ConditionalDirective ( CDK :: Ifopt )
996
- } else if directive. eq_ignore_ascii_case ( "elseif" ) {
997
- TT :: ConditionalDirective ( CDK :: Elseif )
998
- } else if directive. eq_ignore_ascii_case ( "else" ) {
999
- TT :: ConditionalDirective ( CDK :: Else )
1000
- } else if directive. eq_ignore_ascii_case ( "ifend" ) {
1001
- TT :: ConditionalDirective ( CDK :: Ifend )
1002
- } else if directive. eq_ignore_ascii_case ( "endif" ) {
1003
- TT :: ConditionalDirective ( CDK :: Endif )
1004
- } else {
1005
- TT :: CompilerDirective
997
+ fn conditional_directive_type (
998
+ input : & str ,
999
+ offset : usize ,
1000
+ ) -> ( usize , Option < ConditionalDirectiveKind > ) {
1001
+ let end_offset = offset + count_matching ( input, offset, |b| b. is_ascii_alphabetic ( ) ) ;
1002
+
1003
+ let directive = & input[ offset..end_offset] ;
1004
+
1005
+ let kind = {
1006
+ if directive. eq_ignore_ascii_case ( "if" ) {
1007
+ Some ( CDK :: If )
1008
+ } else if directive. eq_ignore_ascii_case ( "ifdef" ) {
1009
+ Some ( CDK :: Ifdef )
1010
+ } else if directive. eq_ignore_ascii_case ( "ifndef" ) {
1011
+ Some ( CDK :: Ifndef )
1012
+ } else if directive. eq_ignore_ascii_case ( "ifopt" ) {
1013
+ Some ( CDK :: Ifopt )
1014
+ } else if directive. eq_ignore_ascii_case ( "elseif" ) {
1015
+ Some ( CDK :: Elseif )
1016
+ } else if directive. eq_ignore_ascii_case ( "else" ) {
1017
+ Some ( CDK :: Else )
1018
+ } else if directive. eq_ignore_ascii_case ( "ifend" ) {
1019
+ Some ( CDK :: Ifend )
1020
+ } else if directive. eq_ignore_ascii_case ( "endif" ) {
1021
+ Some ( CDK :: Endif )
1022
+ } else {
1023
+ None
1024
+ }
1025
+ } ;
1026
+
1027
+ ( end_offset, kind)
1028
+ }
1029
+
1030
+ #[ derive( Eq , PartialEq , Copy , Clone ) ]
1031
+ enum BlockCommentKind {
1032
+ ParenStar ,
1033
+ Brace ,
1034
+ }
1035
+
1036
+ fn parse_directive_expr (
1037
+ mut args : LexArgs ,
1038
+ kind : BlockCommentKind ,
1039
+ ) -> ( RawTokenType , Option < usize > ) {
1040
+ let ( offset, cdk) = conditional_directive_type ( args. input , args. offset ) ;
1041
+ args. offset = offset;
1042
+
1043
+ match cdk {
1044
+ Some ( cdk @ ( CDK :: If | CDK :: Elseif ) ) => (
1045
+ TT :: ConditionalDirective ( cdk) ,
1046
+ find_directive_expr_end ( args, kind) ,
1047
+ ) ,
1048
+ Some ( cdk) => (
1049
+ TT :: ConditionalDirective ( cdk) ,
1050
+ find_block_comment_end ( args, kind) ,
1051
+ ) ,
1052
+ None => ( TT :: CompilerDirective , find_block_comment_end ( args, kind) ) ,
1006
1053
}
1007
1054
}
1008
1055
1009
- fn _compiler_directive < const START_LEN : usize > (
1010
- input : & str ,
1011
- start_offset : usize ,
1012
- end_offset : Option < usize > ,
1013
- ) -> OffsetAndTokenType {
1014
- let token_type = compiler_directive_type ( input, start_offset) ;
1015
- if let Some ( pos) = end_offset {
1016
- ( pos, token_type)
1017
- } else {
1018
- warn_unterminated ( "compiler directive" , input, start_offset - START_LEN ) ;
1019
- consume_to_eof ( input, token_type)
1056
+ fn find_directive_expr_end ( mut args : LexArgs , kind : BlockCommentKind ) -> Option < usize > {
1057
+ let input = args. input . as_bytes ( ) ;
1058
+ loop {
1059
+ match (
1060
+ input. get ( args. offset ) ,
1061
+ input. get ( args. offset + 1 ) ,
1062
+ input. get ( args. offset + 2 ) ,
1063
+ ) {
1064
+ // end alt block comment or directive
1065
+ ( Some ( b'*' ) , Some ( b')' ) , _) if kind == BlockCommentKind :: ParenStar => {
1066
+ return Some ( args. offset + 2 ) ;
1067
+ }
1068
+ // end block comment or directive
1069
+ ( Some ( b'}' ) , _, _) if kind == BlockCommentKind :: Brace => {
1070
+ return Some ( args. offset + 1 ) ;
1071
+ }
1072
+ // start alt directive
1073
+ ( Some ( b'(' ) , Some ( b'*' ) , Some ( b'$' ) ) => {
1074
+ args. offset += 3 ;
1075
+ args. offset =
1076
+ parse_directive_expr ( lex_args_copy ! ( args) , BlockCommentKind :: ParenStar ) . 1 ?;
1077
+ }
1078
+ // start directive
1079
+ ( Some ( b'{' ) , Some ( b'$' ) , _) => {
1080
+ args. offset += 2 ;
1081
+ args. offset =
1082
+ parse_directive_expr ( lex_args_copy ! ( args) , BlockCommentKind :: Brace ) . 1 ?;
1083
+ }
1084
+ // start alt block
1085
+ ( Some ( b'(' ) , Some ( b'*' ) , _) => {
1086
+ args. offset += 2 ;
1087
+ args. offset = block_comment_alt ( lex_args_copy ! ( args) ) . 0 ;
1088
+ }
1089
+ // start block
1090
+ ( Some ( b'{' ) , _, _) => {
1091
+ args. offset += 1 ;
1092
+ args. offset = block_comment ( lex_args_copy ! ( args) ) . 0 ;
1093
+ }
1094
+ // start string
1095
+ ( Some ( b'\'' ) , _, _) => {
1096
+ args. offset += 1 ;
1097
+ args. offset = text_literal ( lex_args_copy ! ( args) ) . 0 ;
1098
+ }
1099
+ // start line comment
1100
+ ( Some ( b'/' ) , Some ( b'/' ) , _) => {
1101
+ args. offset += 2 ;
1102
+ args. offset = line_comment ( lex_args_copy ! ( args) ) . 0 ;
1103
+ }
1104
+ ( None , _, _) => {
1105
+ return None ;
1106
+ }
1107
+ _ => {
1108
+ args. offset += 1 ;
1109
+ }
1110
+ }
1020
1111
}
1021
1112
}
1022
1113
1023
- fn compiler_directive_alt ( args : LexArgs ) -> OffsetAndTokenType {
1024
- let end_offset = memchr:: memmem:: find ( args. input . as_bytes ( ) , b"*)" ) . map ( |o| o + 2 ) ;
1025
- _compiler_directive :: < 2 > ( args. input , args. offset , end_offset)
1114
+ fn find_block_comment_end (
1115
+ LexArgs { input, offset, .. } : LexArgs ,
1116
+ kind : BlockCommentKind ,
1117
+ ) -> Option < usize > {
1118
+ match kind {
1119
+ BlockCommentKind :: ParenStar => {
1120
+ memchr:: memmem:: find ( & input. as_bytes ( ) [ offset..] , b"*)" ) . map ( |o| offset + o + 2 )
1121
+ }
1122
+ BlockCommentKind :: Brace => {
1123
+ memchr:: memchr ( b'}' , & input. as_bytes ( ) [ offset..] ) . map ( |o| offset + o + 1 )
1124
+ }
1125
+ }
1026
1126
}
1027
1127
1028
- fn compiler_directive ( args : LexArgs ) -> OffsetAndTokenType {
1029
- let end_offset = memchr:: memchr ( b'}' , args. input . as_bytes ( ) ) . map ( |o| o + 1 ) ;
1030
- _compiler_directive :: < 1 > ( args. input , args. offset , end_offset)
1128
+ fn compiler_directive ( args : LexArgs , kind : BlockCommentKind ) -> OffsetAndTokenType {
1129
+ let ( token_type, end_offset) = parse_directive_expr ( lex_args_copy ! ( args) , kind) ;
1130
+
1131
+ if let Some ( pos) = end_offset {
1132
+ ( pos, token_type)
1133
+ } else {
1134
+ let start_len = match kind {
1135
+ BlockCommentKind :: ParenStar => 2 ,
1136
+ BlockCommentKind :: Brace => 1 ,
1137
+ } ;
1138
+ warn_unterminated ( "compiler directive" , args. input , args. offset - start_len) ;
1139
+ consume_to_eof ( args. input , token_type)
1140
+ }
1031
1141
}
1032
1142
1033
1143
fn block_comment_kind (
@@ -1045,12 +1155,13 @@ fn block_comment_kind(
1045
1155
}
1046
1156
}
1047
1157
1048
- fn _block_comment < const START_LEN : usize > (
1158
+ fn _block_comment (
1049
1159
LexArgs {
1050
1160
input,
1051
1161
offset,
1052
1162
lex_state,
1053
1163
} : LexArgs ,
1164
+ start_len : usize ,
1054
1165
end_offset : Option < usize > ,
1055
1166
) -> OffsetAndTokenType {
1056
1167
if let Some ( end_offset) = end_offset {
@@ -1059,19 +1170,19 @@ fn _block_comment<const START_LEN: usize>(
1059
1170
let comment_kind = block_comment_kind ( nl_offset, offset, end_offset, lex_state) ;
1060
1171
( end_offset, TT :: Comment ( comment_kind) )
1061
1172
} else {
1062
- warn_unterminated ( "block comment" , input, offset - START_LEN ) ;
1173
+ warn_unterminated ( "block comment" , input, offset - start_len ) ;
1063
1174
consume_to_eof ( input, TT :: Comment ( CommentKind :: MultilineBlock ) )
1064
1175
}
1065
1176
}
1066
1177
1067
1178
fn block_comment_alt ( args : LexArgs ) -> OffsetAndTokenType {
1068
- let end_offset = memchr :: memmem :: find ( args . input . as_bytes ( ) , b"*)" ) . map ( |pos| pos + 2 ) ;
1069
- _block_comment :: < 2 > ( args, end_offset)
1179
+ let end_offset = find_block_comment_end ( lex_args_copy ! ( args ) , BlockCommentKind :: ParenStar ) ;
1180
+ _block_comment ( args, 2 , end_offset)
1070
1181
}
1071
1182
1072
1183
fn block_comment ( args : LexArgs ) -> OffsetAndTokenType {
1073
- let end_offset = memchr :: memchr ( b'}' , args . input . as_bytes ( ) ) . map ( |pos| pos + 1 ) ;
1074
- _block_comment :: < 1 > ( args, end_offset)
1184
+ let end_offset = find_block_comment_end ( lex_args_copy ! ( args ) , BlockCommentKind :: Brace ) ;
1185
+ _block_comment ( args, 1 , end_offset)
1075
1186
}
1076
1187
1077
1188
fn line_comment (
@@ -1096,14 +1207,14 @@ fn line_comment(
1096
1207
1097
1208
fn compiler_directive_or_comment_alt ( args : LexArgs ) -> OffsetAndTokenType {
1098
1209
match args. next_byte ( ) {
1099
- Some ( b'$' ) => compiler_directive_alt ( args. consume ( 1 ) ) ,
1210
+ Some ( b'$' ) => compiler_directive ( args. consume ( 1 ) , BlockCommentKind :: ParenStar ) ,
1100
1211
_ => block_comment_alt ( args) ,
1101
1212
}
1102
1213
}
1103
1214
1104
1215
fn compiler_directive_or_comment ( args : LexArgs ) -> OffsetAndTokenType {
1105
1216
match args. next_byte ( ) {
1106
- Some ( b'$' ) => compiler_directive ( args. consume ( 1 ) ) ,
1217
+ Some ( b'$' ) => compiler_directive ( args. consume ( 1 ) , BlockCommentKind :: Brace ) ,
1107
1218
_ => block_comment ( args) ,
1108
1219
}
1109
1220
}
@@ -1449,6 +1560,102 @@ mod tests {
1449
1560
} ,
1450
1561
& [ ( "(*$if\n // other comment\n Foo;" , IF_DIRECTIVE ) ] ,
1451
1562
) ;
1563
+ // nested unterminated block comment
1564
+ run_test ( "{$if (*if } //" , & [ ( "{$if (*if } //" , IF_DIRECTIVE ) ] ) ;
1565
+ // nested unterminated directive
1566
+ run_test ( "{$if (*$if } //" , & [ ( "{$if (*$if } //" , IF_DIRECTIVE ) ] ) ;
1567
+ // nested line comment
1568
+ run_test ( "{$if // } //" , & [ ( "{$if // } //" , IF_DIRECTIVE ) ] ) ;
1569
+ // nested string literal
1570
+ run_test ( "{$if '} //" , & [ ( "{$if '} //" , IF_DIRECTIVE ) ] ) ;
1571
+ }
1572
+
1573
+ #[ test]
1574
+ fn lex_complex_directive_expressions ( ) {
1575
+ /*
1576
+ Since Delphi conditional directives contain expressions, they can also contain comments
1577
+ and other conditional directives (to a limited and buggy extent). This means that we
1578
+ can't just treat them as block comments when finding the bounds of the token for the
1579
+ directive.
1580
+
1581
+ The only directives that can contain these expressions are `if` and `elseif`, all the
1582
+ others can safely be lexed as a simple block comment.
1583
+ */
1584
+
1585
+ run_test (
1586
+ indoc ! { "
1587
+ {$if {$i foo} = 0}
1588
+ {$if (*$i foo*) = 0}
1589
+ (*$if (*$i foo*) = 0*)
1590
+ (*$if {$i foo} = 0*)
1591
+
1592
+ {$if {(*} (*{*) {(* {$if }}
1593
+
1594
+ {$if {{} }
1595
+ {$if {$if {}} }
1596
+
1597
+ {$if {$i foo} = 0*) }
1598
+ (*$if {$i foo} = 0} *)
1599
+
1600
+ {$if
1601
+ {$if True}
1602
+ FOO
1603
+ (*$elseif {$if True}FOO{$else}BAR{$endif} *)
1604
+ BAR
1605
+ {$endif}
1606
+ = 0
1607
+ }
1608
+
1609
+ {$ifdef {$i inc}
1610
+ {$if {$ifdef {}}
1611
+
1612
+ {$if a = '}'#10#13''''}
1613
+ {$if a = #10}
1614
+
1615
+ {$if a = '''
1616
+ }
1617
+ '''
1618
+ }
1619
+
1620
+ " } ,
1621
+ & [
1622
+ // Ensure directives and comments can be mixed in any order
1623
+ ( "{$if {$i foo} = 0}" , IF_DIRECTIVE ) ,
1624
+ ( "{$if (*$i foo*) = 0}" , IF_DIRECTIVE ) ,
1625
+ ( "(*$if (*$i foo*) = 0*)" , IF_DIRECTIVE ) ,
1626
+ ( "(*$if {$i foo} = 0*)" , IF_DIRECTIVE ) ,
1627
+ ( "{$if {(*} (*{*) {(* {$if }}" , IF_DIRECTIVE ) ,
1628
+ // Ensure that block comments cannot be started from within nested block comments
1629
+ ( "{$if {{} }" , IF_DIRECTIVE ) ,
1630
+ // Ensure that block comments can be started from within nested conditional directives
1631
+ ( "{$if {$if {}} }" , IF_DIRECTIVE ) ,
1632
+ // Ensure that the nested directives are closed with the right half of the pair
1633
+ ( "{$if {$i foo} = 0*) }" , IF_DIRECTIVE ) ,
1634
+ ( "(*$if {$i foo} = 0} *)" , IF_DIRECTIVE ) ,
1635
+ // Ensure nesting works recursively
1636
+ (
1637
+ indoc ! { "
1638
+ {$if
1639
+ {$if True}
1640
+ FOO
1641
+ (*$elseif {$if True}FOO{$else}BAR{$endif} *)
1642
+ BAR
1643
+ {$endif}
1644
+ = 0
1645
+ }"
1646
+ } ,
1647
+ IF_DIRECTIVE ,
1648
+ ) ,
1649
+ // Ensure that the nesting doesn't work with non-expression directives
1650
+ ( "{$ifdef {$i inc}" , IFDEF_DIRECTIVE ) ,
1651
+ // Ensure that the nesting doesn't work within a nested non-expression directive
1652
+ ( "{$if {$ifdef {}}" , IF_DIRECTIVE ) ,
1653
+ // Ensure that nested text literals work
1654
+ ( "{$if a = '}'#10#13''''}" , IF_DIRECTIVE ) ,
1655
+ ( "{$if a = #10}" , IF_DIRECTIVE ) ,
1656
+ ( "{$if a = '''\n }\n '''\n }" , IF_DIRECTIVE ) ,
1657
+ ] ,
1658
+ )
1452
1659
}
1453
1660
1454
1661
#[ test]
0 commit comments