@@ -882,9 +882,8 @@ fn node_space1(input: &mut Input<'_>) -> PResult<()> {
882
882
repeat ( 1 .., node_space) . parse_next ( input)
883
883
}
884
884
885
- /// ` string := identifier-string | quoted-string | raw-string`
885
+ /// string := identifier-string | quoted-string | raw-string ¶
886
886
pub ( crate ) fn string ( input : & mut Input < ' _ > ) -> PResult < Option < KdlValue > > {
887
- // TODO: shouldn't put the `resume_after_cut`s here, because they mess with context from higher levels.
888
887
trace (
889
888
"string" ,
890
889
alt ( (
@@ -1018,32 +1017,65 @@ fn equals_sign(input: &mut Input<'_>) -> PResult<()> {
1018
1017
}
1019
1018
1020
1019
/// ```text
1021
- /// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space*) '"""'
1020
+ /// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline ( unicode-space | ('\' (unicode-space | newline)+) *) '"""'
1022
1021
/// single-line-string-body := (string-character - newline)*
1023
- /// multi-line-string-body := string-character*
1022
+ /// multi-line-string-body := (('"' | '""')? string-character) *
1024
1023
/// ```
1025
- fn quoted_string < ' s > ( input : & mut Input < ' s > ) -> PResult < KdlValue > {
1026
- let quotes = alt ( ( ( "\" \" \" " , newline) . take ( ) , "\" " ) ) . parse_next ( input) ?;
1024
+ fn quoted_string ( input : & mut Input < ' _ > ) -> PResult < KdlValue > {
1025
+ let quotes =
1026
+ alt ( (
1027
+ (
1028
+ "\" \" \" " ,
1029
+ cut_err ( newline) . context ( cx ( ) . lbl ( "multi-line string newline" ) . msg (
1030
+ "Multi-line string opening quotes must be immediately followed by a newline" ,
1031
+ ) ) ,
1032
+ )
1033
+ . take ( ) ,
1034
+ "\" " ,
1035
+ ) )
1036
+ . parse_next ( input) ?;
1027
1037
let is_multiline = quotes. len ( ) > 1 ;
1028
1038
let ml_prefix: Option < String > = if is_multiline {
1029
1039
Some (
1030
- peek ( preceded (
1040
+ cut_err ( peek ( preceded (
1031
1041
repeat_till (
1032
1042
0 ..,
1033
1043
(
1034
- repeat ( 0 .., ( not ( newline) , opt ( ws_escape) , string_char) ) . map ( |( ) | ( ) ) ,
1044
+ repeat (
1045
+ 0 ..,
1046
+ (
1047
+ not ( newline) ,
1048
+ alt ( (
1049
+ ws_escape. void ( ) ,
1050
+ trace (
1051
+ "valid string body char(s)" ,
1052
+ alt ( (
1053
+ ( '\"' , not ( "\" \" " ) ) . void ( ) ,
1054
+ ( '\"' , not ( "\" " ) ) . void ( ) ,
1055
+ string_char. void ( ) ,
1056
+ ) ) ,
1057
+ )
1058
+ . void ( ) ,
1059
+ ) ) ,
1060
+ ) ,
1061
+ )
1062
+ . map ( |( ) | ( ) ) ,
1035
1063
newline,
1036
1064
) ,
1037
1065
peek ( terminated (
1038
- repeat ( 0 .., unicode_space) . map ( |( ) | ( ) ) ,
1066
+ repeat ( 0 .., alt ( ( ws_escape , unicode_space) ) ) . map ( |( ) | ( ) ) ,
1039
1067
"\" \" \" " ,
1040
1068
) ) ,
1041
1069
)
1042
1070
. map ( |( ( ) , ( ) ) | ( ) ) ,
1043
- terminated ( repeat ( 0 .., unicode_space) . map ( |( ) | ( ) ) . take ( ) , "\" \" \" " ) ,
1044
- ) )
1045
- . parse_next ( input) ?
1046
- . to_string ( ) ,
1071
+ terminated (
1072
+ repeat ( 0 .., alt ( ( ws_escape. map ( |_| "" ) , unicode_space. take ( ) ) ) )
1073
+ . map ( |s : String | s) ,
1074
+ "\" \" \" " ,
1075
+ ) ,
1076
+ ) ) )
1077
+ . context ( cx ( ) . lbl ( "multi-line string" ) )
1078
+ . parse_next ( input) ?,
1047
1079
)
1048
1080
} else {
1049
1081
None
@@ -1052,30 +1084,40 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
1052
1084
let parser = repeat_till (
1053
1085
0 ..,
1054
1086
(
1055
- cut_err ( alt ( ( & prefix[ ..] , peek ( newline ) . take ( ) ) ) )
1087
+ cut_err ( alt ( ( ( & prefix[ ..] ) . void ( ) , peek ( empty_line ) . void ( ) ) ) )
1056
1088
. context ( cx ( ) . msg ( "matching multiline string prefix" ) . lbl ( "bad prefix" ) . hlp ( "Multi-line string bodies must be prefixed by the exact same whitespace as the leading whitespace before the closing '\" \" \" '" ) ) ,
1057
1089
alt ( (
1058
- newline . take ( ) . map ( |_| " \n " . to_string ( ) ) ,
1090
+ empty_line . map ( |s| s . to_string ( ) ) ,
1059
1091
repeat_till (
1060
1092
0 ..,
1061
- ( not ( newline) , opt ( ws_escape) , string_char) . map ( |( _, _, s) | s) ,
1093
+ (
1094
+ not ( newline) ,
1095
+ alt ( (
1096
+ ws_escape. map ( |_| None ) ,
1097
+ alt ( (
1098
+ ( '\"' , not ( "\" \" " ) ) . map ( |( c, ( ) ) | Some ( c) ) ,
1099
+ ( '\"' , not ( "\" " ) ) . map ( |( c, ( ) ) | Some ( c) ) ,
1100
+ string_char. map ( Some ) ,
1101
+ ) )
1102
+ ) )
1103
+ ) . map ( |( _, c) | c) ,
1062
1104
newline,
1063
1105
)
1064
1106
// multiline string literal newlines are normalized to `\n`
1065
- . map ( |( s , _) : ( String , _ ) | format ! ( "{s} \n " ) ) ,
1107
+ . map ( |( cs , _) : ( Vec < Option < char > > , _ ) | cs . into_iter ( ) . flatten ( ) . chain ( vec ! [ '\n' ] ) . collect :: < String > ( ) ) ,
1066
1108
) ) ,
1067
1109
)
1068
1110
. map ( |( _, s) | s) ,
1069
1111
(
1070
1112
& prefix[ ..] ,
1071
- repeat ( 0 .., unicode_space ) . map ( |( ) | ( ) ) . take ( ) ,
1113
+ repeat ( 0 .., ws_escape . void ( ) ) . map ( |( ) | ( ) ) ,
1072
1114
peek ( "\" \" \" " ) ,
1073
1115
) ,
1074
1116
)
1075
1117
. map ( |( s, _) : ( Vec < String > , ( _ , _ , _ ) ) | {
1076
1118
let mut s = s. join ( "" ) ;
1077
1119
// Slice off the `\n` at the end of the last line.
1078
- s. truncate ( s. len ( ) - 1 ) ;
1120
+ s. truncate ( s. len ( ) . saturating_sub ( 1 ) ) ;
1079
1121
s
1080
1122
} )
1081
1123
. context ( cx ( ) . lbl ( "multi-line quoted string" ) ) ;
@@ -1090,13 +1132,14 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
1090
1132
. hlp ( "You can make a string multi-line by wrapping it in '\" \" \" ', with a newline immediately after the opening quotes." ) ,
1091
1133
) ,
1092
1134
) ,
1093
- opt ( ws_escape) ,
1094
- string_char,
1095
- )
1096
- . map ( |( _, _, s) | s) ,
1097
- ( repeat ( 0 .., unicode_space) . map ( |( ) | ( ) ) . take ( ) , peek ( "\" " ) ) ,
1135
+ alt ( (
1136
+ ws_escape. map ( |_| None ) ,
1137
+ string_char. map ( Some ) ,
1138
+ ) )
1139
+ ) . map ( |( _, c) | c) ,
1140
+ peek ( "\" " )
1098
1141
)
1099
- . map ( |( s , ( end , _ ) ) : ( String , ( & ' s str , _ ) ) | format ! ( "{s}{end}" ) )
1142
+ . map ( |( cs , _ ) : ( Vec < Option < char > > , _ ) | cs . into_iter ( ) . flatten ( ) . collect :: < String > ( ) )
1100
1143
. context ( cx ( ) . lbl ( "quoted string" ) ) ;
1101
1144
cut_err ( parser) . parse_next ( input) ?
1102
1145
} ;
@@ -1112,8 +1155,19 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
1112
1155
Ok ( KdlValue :: String ( body) )
1113
1156
}
1114
1157
1158
+ fn empty_line ( input : & mut Input < ' _ > ) -> PResult < & ' static str > {
1159
+ repeat ( 0 .., alt ( ( ws_escape. void ( ) , unicode_space. void ( ) ) ) )
1160
+ . map ( |( ) | ( ) )
1161
+ . parse_next ( input) ?;
1162
+ newline. parse_next ( input) ?;
1163
+ Ok ( "\n " )
1164
+ }
1165
+
1115
1166
/// Like badval, but is able to slurp up invalid raw strings, which contain whitespace.
1116
1167
fn quoted_string_badval ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
1168
+ // TODO(@zkat): this should have different behavior based on whether we're
1169
+ // resuming a single or multi-line string. Right now, multi-liners end up
1170
+ // with silly errors.
1117
1171
(
1118
1172
repeat_till (
1119
1173
0 ..,
@@ -1135,19 +1189,25 @@ fn quoted_string_terminator(input: &mut Input<'_>) -> PResult<()> {
1135
1189
/// ```
1136
1190
fn string_char ( input : & mut Input < ' _ > ) -> PResult < char > {
1137
1191
alt ( (
1138
- escaped_char,
1139
- ( not ( disallowed_unicode) , none_of ( [ '\\' , '"' ] ) ) . map ( |( _, c) | c) ,
1192
+ trace ( "escaped char" , escaped_char) ,
1193
+ trace (
1194
+ "regular string char" ,
1195
+ ( not ( disallowed_unicode) , none_of ( [ '\\' , '"' ] ) ) . map ( |( _, c) | c) ,
1196
+ ) ,
1140
1197
) )
1141
1198
. parse_next ( input)
1142
1199
}
1143
1200
1144
1201
fn ws_escape ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
1145
- (
1146
- "\\ " ,
1147
- repeat ( 1 .., alt ( ( unicode_space, newline) ) ) . map ( |( ) | ( ) ) ,
1202
+ trace (
1203
+ "ws_escape" ,
1204
+ (
1205
+ "\\ " ,
1206
+ repeat ( 1 .., alt ( ( unicode_space, newline) ) ) . map ( |( ) | ( ) ) ,
1207
+ ) ,
1148
1208
)
1149
- . void ( )
1150
- . parse_next ( input)
1209
+ . void ( )
1210
+ . parse_next ( input)
1151
1211
}
1152
1212
1153
1213
/// ```text
@@ -1182,10 +1242,13 @@ fn escaped_char(input: &mut Input<'_>) -> PResult<char> {
1182
1242
. parse_next ( input)
1183
1243
}
1184
1244
1185
- /// `raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'`
1186
- /// `raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space*) '"""'`
1187
- /// `single-line-raw-string-body := (unicode - newline - disallowed-literal-code-points)*`
1188
- /// `multi-line-raw-string-body := (unicode - disallowed-literal-code-points)`
1245
+ /// ```text
1246
+ /// raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
1247
+ /// raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body '"""'
1248
+ /// single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*?
1249
+ /// single-line-raw-string-char := unicode - newline - disallowed-literal-code-points
1250
+ /// multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*?
1251
+ /// ```
1189
1252
fn raw_string ( input : & mut Input < ' _ > ) -> PResult < KdlValue > {
1190
1253
let hashes: String = repeat ( 1 .., "#" ) . parse_next ( input) ?;
1191
1254
let quotes = alt ( ( ( "\" \" \" " , newline) . take ( ) , "\" " ) ) . parse_next ( input) ?;
@@ -1229,10 +1292,10 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
1229
1292
repeat_till (
1230
1293
0 ..,
1231
1294
(
1232
- cut_err ( alt ( ( & prefix[ ..] , peek ( newline ) . take ( ) ) ) )
1295
+ cut_err ( alt ( ( ( & prefix[ ..] ) . void ( ) , peek ( empty_line ) . void ( ) ) ) )
1233
1296
. context ( cx ( ) . lbl ( "matching multiline raw string prefix" ) ) ,
1234
1297
alt ( (
1235
- newline . take ( ) . map ( |_| " \n " . to_string ( ) ) ,
1298
+ empty_line . map ( |s| s . to_string ( ) ) ,
1236
1299
repeat_till (
1237
1300
0 ..,
1238
1301
( not ( newline) , not ( ( "\" \" \" " , & hashes[ ..] ) ) , any)
@@ -1254,7 +1317,7 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
1254
1317
. map ( |( s, _) : ( Vec < String > , ( _ , _ , _ ) ) | {
1255
1318
let mut s = s. join ( "" ) ;
1256
1319
// Slice off the `\n` at the end of the last line.
1257
- s. truncate ( s. len ( ) - 1 ) ;
1320
+ s. truncate ( s. len ( ) . saturating_sub ( 1 ) ) ;
1258
1321
s
1259
1322
} )
1260
1323
. parse_next ( input) ?
@@ -1311,7 +1374,7 @@ mod string_tests {
1311
1374
}
1312
1375
1313
1376
#[ test]
1314
- fn quoted_string ( ) {
1377
+ fn single_line_quoted_string ( ) {
1315
1378
assert_eq ! (
1316
1379
string. parse( new_input( "\" foo\" " ) ) . unwrap( ) ,
1317
1380
Some ( KdlValue :: String ( "foo" . into( ) ) )
@@ -1363,6 +1426,14 @@ mod string_tests {
1363
1426
Some ( KdlValue :: String ( "\n string\t " . into( ) ) ) ,
1364
1427
"Empty line without any indentation"
1365
1428
) ;
1429
+ assert_eq ! (
1430
+ string
1431
+ . parse( new_input( "\" \" \" \n \\ \n \n \" \" \" " ) )
1432
+ . unwrap( ) ,
1433
+ Some ( KdlValue :: String ( "" . into( ) ) ) ,
1434
+ "Escaped whitespace with proper prefix"
1435
+ ) ;
1436
+
1366
1437
assert ! ( string
1367
1438
. parse( new_input( "\" \" \" \n foo\n bar\n baz\n \" \" \" " ) )
1368
1439
. is_err( ) ) ;
@@ -1491,9 +1562,9 @@ fn disallowed_unicode(input: &mut Input<'_>) -> PResult<()> {
1491
1562
/// `escline := '\\' ws* (single-line-comment | newline | eof)`
1492
1563
fn escline ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
1493
1564
"\\ " . parse_next ( input) ?;
1494
- repeat ( 0 .. , ws ) . map ( |_ : ( ) | ( ) ) . parse_next ( input) ?;
1565
+ wss . parse_next ( input) ?;
1495
1566
alt ( ( single_line_comment, newline, eof. void ( ) ) ) . parse_next ( input) ?;
1496
- repeat ( 0 .. , ws ) . map ( |_ : ( ) | ( ) ) . parse_next ( input)
1567
+ wss . parse_next ( input)
1497
1568
}
1498
1569
1499
1570
#[ cfg( test) ]
@@ -1596,9 +1667,12 @@ fn multi_line_comment_test() {
1596
1667
. is_ok( ) ) ;
1597
1668
}
1598
1669
1599
- /// slashdash := '/-' line-space*
1670
+ /// slashdash := '/-' (node-space | line-space) *
1600
1671
fn slashdash ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
1601
- ( "/-" , repeat ( 0 .., line_space) . map ( |( ) | ( ) ) )
1672
+ (
1673
+ "/-" ,
1674
+ repeat ( 0 .., alt ( ( node_space, line_space) ) ) . map ( |( ) | ( ) ) ,
1675
+ )
1602
1676
. void ( )
1603
1677
. parse_next ( input)
1604
1678
}
0 commit comments