@@ -882,9 +882,8 @@ fn node_space1(input: &mut Input<'_>) -> PResult<()> {
882882 repeat ( 1 .., node_space) . parse_next ( input)
883883}
884884
885- /// ` string := identifier-string | quoted-string | raw-string`
885+ /// string := identifier-string | quoted-string | raw-string ¶
886886pub ( crate ) fn string ( input : & mut Input < ' _ > ) -> PResult < Option < KdlValue > > {
887- // TODO: shouldn't put the `resume_after_cut`s here, because they mess with context from higher levels.
888887 trace (
889888 "string" ,
890889 alt ( (
@@ -1018,32 +1017,65 @@ fn equals_sign(input: &mut Input<'_>) -> PResult<()> {
10181017}
10191018
10201019/// ```text
1021- /// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space*) '"""'
1020+ /// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline ( unicode-space | ('\' (unicode-space | newline)+) *) '"""'
10221021/// single-line-string-body := (string-character - newline)*
1023- /// multi-line-string-body := string-character*
1022+ /// multi-line-string-body := (('"' | '""')? string-character) *
10241023/// ```
1025- fn quoted_string < ' s > ( input : & mut Input < ' s > ) -> PResult < KdlValue > {
1026- let quotes = alt ( ( ( "\" \" \" " , newline) . take ( ) , "\" " ) ) . parse_next ( input) ?;
1024+ fn quoted_string ( input : & mut Input < ' _ > ) -> PResult < KdlValue > {
1025+ let quotes =
1026+ alt ( (
1027+ (
1028+ "\" \" \" " ,
1029+ cut_err ( newline) . context ( cx ( ) . lbl ( "multi-line string newline" ) . msg (
1030+ "Multi-line string opening quotes must be immediately followed by a newline" ,
1031+ ) ) ,
1032+ )
1033+ . take ( ) ,
1034+ "\" " ,
1035+ ) )
1036+ . parse_next ( input) ?;
10271037 let is_multiline = quotes. len ( ) > 1 ;
10281038 let ml_prefix: Option < String > = if is_multiline {
10291039 Some (
1030- peek ( preceded (
1040+ cut_err ( peek ( preceded (
10311041 repeat_till (
10321042 0 ..,
10331043 (
1034- repeat ( 0 .., ( not ( newline) , opt ( ws_escape) , string_char) ) . map ( |( ) | ( ) ) ,
1044+ repeat (
1045+ 0 ..,
1046+ (
1047+ not ( newline) ,
1048+ alt ( (
1049+ ws_escape. void ( ) ,
1050+ trace (
1051+ "valid string body char(s)" ,
1052+ alt ( (
1053+ ( '\"' , not ( "\" \" " ) ) . void ( ) ,
1054+ ( '\"' , not ( "\" " ) ) . void ( ) ,
1055+ string_char. void ( ) ,
1056+ ) ) ,
1057+ )
1058+ . void ( ) ,
1059+ ) ) ,
1060+ ) ,
1061+ )
1062+ . map ( |( ) | ( ) ) ,
10351063 newline,
10361064 ) ,
10371065 peek ( terminated (
1038- repeat ( 0 .., unicode_space) . map ( |( ) | ( ) ) ,
1066+ repeat ( 0 .., alt ( ( ws_escape , unicode_space) ) ) . map ( |( ) | ( ) ) ,
10391067 "\" \" \" " ,
10401068 ) ) ,
10411069 )
10421070 . map ( |( ( ) , ( ) ) | ( ) ) ,
1043- terminated ( repeat ( 0 .., unicode_space) . map ( |( ) | ( ) ) . take ( ) , "\" \" \" " ) ,
1044- ) )
1045- . parse_next ( input) ?
1046- . to_string ( ) ,
1071+ terminated (
1072+ repeat ( 0 .., alt ( ( ws_escape. map ( |_| "" ) , unicode_space. take ( ) ) ) )
1073+ . map ( |s : String | s) ,
1074+ "\" \" \" " ,
1075+ ) ,
1076+ ) ) )
1077+ . context ( cx ( ) . lbl ( "multi-line string" ) )
1078+ . parse_next ( input) ?,
10471079 )
10481080 } else {
10491081 None
@@ -1052,30 +1084,40 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
10521084 let parser = repeat_till (
10531085 0 ..,
10541086 (
1055- cut_err ( alt ( ( & prefix[ ..] , peek ( newline ) . take ( ) ) ) )
1087+ cut_err ( alt ( ( ( & prefix[ ..] ) . void ( ) , peek ( empty_line ) . void ( ) ) ) )
10561088 . context ( cx ( ) . msg ( "matching multiline string prefix" ) . lbl ( "bad prefix" ) . hlp ( "Multi-line string bodies must be prefixed by the exact same whitespace as the leading whitespace before the closing '\" \" \" '" ) ) ,
10571089 alt ( (
1058- newline . take ( ) . map ( |_| " \n " . to_string ( ) ) ,
1090+ empty_line . map ( |s| s . to_string ( ) ) ,
10591091 repeat_till (
10601092 0 ..,
1061- ( not ( newline) , opt ( ws_escape) , string_char) . map ( |( _, _, s) | s) ,
1093+ (
1094+ not ( newline) ,
1095+ alt ( (
1096+ ws_escape. map ( |_| None ) ,
1097+ alt ( (
1098+ ( '\"' , not ( "\" \" " ) ) . map ( |( c, ( ) ) | Some ( c) ) ,
1099+ ( '\"' , not ( "\" " ) ) . map ( |( c, ( ) ) | Some ( c) ) ,
1100+ string_char. map ( Some ) ,
1101+ ) )
1102+ ) )
1103+ ) . map ( |( _, c) | c) ,
10621104 newline,
10631105 )
10641106 // multiline string literal newlines are normalized to `\n`
1065- . map ( |( s , _) : ( String , _ ) | format ! ( "{s} \n " ) ) ,
1107+ . map ( |( cs , _) : ( Vec < Option < char > > , _ ) | cs . into_iter ( ) . flatten ( ) . chain ( vec ! [ '\n' ] ) . collect :: < String > ( ) ) ,
10661108 ) ) ,
10671109 )
10681110 . map ( |( _, s) | s) ,
10691111 (
10701112 & prefix[ ..] ,
1071- repeat ( 0 .., unicode_space ) . map ( |( ) | ( ) ) . take ( ) ,
1113+ repeat ( 0 .., ws_escape . void ( ) ) . map ( |( ) | ( ) ) ,
10721114 peek ( "\" \" \" " ) ,
10731115 ) ,
10741116 )
10751117 . map ( |( s, _) : ( Vec < String > , ( _ , _ , _ ) ) | {
10761118 let mut s = s. join ( "" ) ;
10771119 // Slice off the `\n` at the end of the last line.
1078- s. truncate ( s. len ( ) - 1 ) ;
1120+ s. truncate ( s. len ( ) . saturating_sub ( 1 ) ) ;
10791121 s
10801122 } )
10811123 . context ( cx ( ) . lbl ( "multi-line quoted string" ) ) ;
@@ -1090,13 +1132,14 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
10901132 . hlp ( "You can make a string multi-line by wrapping it in '\" \" \" ', with a newline immediately after the opening quotes." ) ,
10911133 ) ,
10921134 ) ,
1093- opt ( ws_escape) ,
1094- string_char,
1095- )
1096- . map ( |( _, _, s) | s) ,
1097- ( repeat ( 0 .., unicode_space) . map ( |( ) | ( ) ) . take ( ) , peek ( "\" " ) ) ,
1135+ alt ( (
1136+ ws_escape. map ( |_| None ) ,
1137+ string_char. map ( Some ) ,
1138+ ) )
1139+ ) . map ( |( _, c) | c) ,
1140+ peek ( "\" " )
10981141 )
1099- . map ( |( s , ( end , _ ) ) : ( String , ( & ' s str , _ ) ) | format ! ( "{s}{end}" ) )
1142+ . map ( |( cs , _ ) : ( Vec < Option < char > > , _ ) | cs . into_iter ( ) . flatten ( ) . collect :: < String > ( ) )
11001143 . context ( cx ( ) . lbl ( "quoted string" ) ) ;
11011144 cut_err ( parser) . parse_next ( input) ?
11021145 } ;
@@ -1112,8 +1155,19 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
11121155 Ok ( KdlValue :: String ( body) )
11131156}
11141157
1158+ fn empty_line ( input : & mut Input < ' _ > ) -> PResult < & ' static str > {
1159+ repeat ( 0 .., alt ( ( ws_escape. void ( ) , unicode_space. void ( ) ) ) )
1160+ . map ( |( ) | ( ) )
1161+ . parse_next ( input) ?;
1162+ newline. parse_next ( input) ?;
1163+ Ok ( "\n " )
1164+ }
1165+
11151166/// Like badval, but is able to slurp up invalid raw strings, which contain whitespace.
11161167fn quoted_string_badval ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
1168+ // TODO(@zkat): this should have different behavior based on whether we're
1169+ // resuming a single or multi-line string. Right now, multi-liners end up
1170+ // with silly errors.
11171171 (
11181172 repeat_till (
11191173 0 ..,
@@ -1135,19 +1189,25 @@ fn quoted_string_terminator(input: &mut Input<'_>) -> PResult<()> {
11351189/// ```
11361190fn string_char ( input : & mut Input < ' _ > ) -> PResult < char > {
11371191 alt ( (
1138- escaped_char,
1139- ( not ( disallowed_unicode) , none_of ( [ '\\' , '"' ] ) ) . map ( |( _, c) | c) ,
1192+ trace ( "escaped char" , escaped_char) ,
1193+ trace (
1194+ "regular string char" ,
1195+ ( not ( disallowed_unicode) , none_of ( [ '\\' , '"' ] ) ) . map ( |( _, c) | c) ,
1196+ ) ,
11401197 ) )
11411198 . parse_next ( input)
11421199}
11431200
11441201fn ws_escape ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
1145- (
1146- "\\ " ,
1147- repeat ( 1 .., alt ( ( unicode_space, newline) ) ) . map ( |( ) | ( ) ) ,
1202+ trace (
1203+ "ws_escape" ,
1204+ (
1205+ "\\ " ,
1206+ repeat ( 1 .., alt ( ( unicode_space, newline) ) ) . map ( |( ) | ( ) ) ,
1207+ ) ,
11481208 )
1149- . void ( )
1150- . parse_next ( input)
1209+ . void ( )
1210+ . parse_next ( input)
11511211}
11521212
11531213/// ```text
@@ -1182,10 +1242,13 @@ fn escaped_char(input: &mut Input<'_>) -> PResult<char> {
11821242 . parse_next ( input)
11831243}
11841244
1185- /// `raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'`
1186- /// `raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space*) '"""'`
1187- /// `single-line-raw-string-body := (unicode - newline - disallowed-literal-code-points)*`
1188- /// `multi-line-raw-string-body := (unicode - disallowed-literal-code-points)`
1245+ /// ```text
1246+ /// raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
1247+ /// raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body '"""'
1248+ /// single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*?
1249+ /// single-line-raw-string-char := unicode - newline - disallowed-literal-code-points
1250+ /// multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*?
1251+ /// ```
11891252fn raw_string ( input : & mut Input < ' _ > ) -> PResult < KdlValue > {
11901253 let hashes: String = repeat ( 1 .., "#" ) . parse_next ( input) ?;
11911254 let quotes = alt ( ( ( "\" \" \" " , newline) . take ( ) , "\" " ) ) . parse_next ( input) ?;
@@ -1229,10 +1292,10 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
12291292 repeat_till (
12301293 0 ..,
12311294 (
1232- cut_err ( alt ( ( & prefix[ ..] , peek ( newline ) . take ( ) ) ) )
1295+ cut_err ( alt ( ( ( & prefix[ ..] ) . void ( ) , peek ( empty_line ) . void ( ) ) ) )
12331296 . context ( cx ( ) . lbl ( "matching multiline raw string prefix" ) ) ,
12341297 alt ( (
1235- newline . take ( ) . map ( |_| " \n " . to_string ( ) ) ,
1298+ empty_line . map ( |s| s . to_string ( ) ) ,
12361299 repeat_till (
12371300 0 ..,
12381301 ( not ( newline) , not ( ( "\" \" \" " , & hashes[ ..] ) ) , any)
@@ -1254,7 +1317,7 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
12541317 . map ( |( s, _) : ( Vec < String > , ( _ , _ , _ ) ) | {
12551318 let mut s = s. join ( "" ) ;
12561319 // Slice off the `\n` at the end of the last line.
1257- s. truncate ( s. len ( ) - 1 ) ;
1320+ s. truncate ( s. len ( ) . saturating_sub ( 1 ) ) ;
12581321 s
12591322 } )
12601323 . parse_next ( input) ?
@@ -1311,7 +1374,7 @@ mod string_tests {
13111374 }
13121375
13131376 #[ test]
1314- fn quoted_string ( ) {
1377+ fn single_line_quoted_string ( ) {
13151378 assert_eq ! (
13161379 string. parse( new_input( "\" foo\" " ) ) . unwrap( ) ,
13171380 Some ( KdlValue :: String ( "foo" . into( ) ) )
@@ -1363,6 +1426,14 @@ mod string_tests {
13631426 Some ( KdlValue :: String ( "\n string\t " . into( ) ) ) ,
13641427 "Empty line without any indentation"
13651428 ) ;
1429+ assert_eq ! (
1430+ string
1431+ . parse( new_input( "\" \" \" \n \\ \n \n \" \" \" " ) )
1432+ . unwrap( ) ,
1433+ Some ( KdlValue :: String ( "" . into( ) ) ) ,
1434+ "Escaped whitespace with proper prefix"
1435+ ) ;
1436+
13661437 assert ! ( string
13671438 . parse( new_input( "\" \" \" \n foo\n bar\n baz\n \" \" \" " ) )
13681439 . is_err( ) ) ;
@@ -1491,9 +1562,9 @@ fn disallowed_unicode(input: &mut Input<'_>) -> PResult<()> {
14911562/// `escline := '\\' ws* (single-line-comment | newline | eof)`
14921563fn escline ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
14931564 "\\ " . parse_next ( input) ?;
1494- repeat ( 0 .. , ws ) . map ( |_ : ( ) | ( ) ) . parse_next ( input) ?;
1565+ wss . parse_next ( input) ?;
14951566 alt ( ( single_line_comment, newline, eof. void ( ) ) ) . parse_next ( input) ?;
1496- repeat ( 0 .. , ws ) . map ( |_ : ( ) | ( ) ) . parse_next ( input)
1567+ wss . parse_next ( input)
14971568}
14981569
14991570#[ cfg( test) ]
@@ -1596,9 +1667,12 @@ fn multi_line_comment_test() {
15961667 . is_ok( ) ) ;
15971668}
15981669
1599- /// slashdash := '/-' line-space*
1670+ /// slashdash := '/-' (node-space | line-space) *
16001671fn slashdash ( input : & mut Input < ' _ > ) -> PResult < ( ) > {
1601- ( "/-" , repeat ( 0 .., line_space) . map ( |( ) | ( ) ) )
1672+ (
1673+ "/-" ,
1674+ repeat ( 0 .., alt ( ( node_space, line_space) ) ) . map ( |( ) | ( ) ) ,
1675+ )
16021676 . void ( )
16031677 . parse_next ( input)
16041678}
0 commit comments