Skip to content

Commit a1919a0

Browse files
authored
feat(parser): Update parser to 2.0.0 (#100)
Fixes: #99
1 parent f31750a commit a1919a0

File tree

111 files changed

+223
-53
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+223
-53
lines changed

src/v2_parser.rs

Lines changed: 118 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -882,9 +882,8 @@ fn node_space1(input: &mut Input<'_>) -> PResult<()> {
882882
repeat(1.., node_space).parse_next(input)
883883
}
884884

885-
/// `string := identifier-string | quoted-string | raw-string`
885+
/// string := identifier-string | quoted-string | raw-string
886886
pub(crate) fn string(input: &mut Input<'_>) -> PResult<Option<KdlValue>> {
887-
// TODO: shouldn't put the `resume_after_cut`s here, because they mess with context from higher levels.
888887
trace(
889888
"string",
890889
alt((
@@ -1018,32 +1017,65 @@ fn equals_sign(input: &mut Input<'_>) -> PResult<()> {
10181017
}
10191018

10201019
/// ```text
1021-
/// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space*) '"""'
1020+
/// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline (unicode-space | ('\' (unicode-space | newline)+)*) '"""'
10221021
/// single-line-string-body := (string-character - newline)*
1023-
/// multi-line-string-body := string-character*
1022+
/// multi-line-string-body := (('"' | '""')? string-character)*
10241023
/// ```
1025-
fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
1026-
let quotes = alt((("\"\"\"", newline).take(), "\"")).parse_next(input)?;
1024+
fn quoted_string(input: &mut Input<'_>) -> PResult<KdlValue> {
1025+
let quotes =
1026+
alt((
1027+
(
1028+
"\"\"\"",
1029+
cut_err(newline).context(cx().lbl("multi-line string newline").msg(
1030+
"Multi-line string opening quotes must be immediately followed by a newline",
1031+
)),
1032+
)
1033+
.take(),
1034+
"\"",
1035+
))
1036+
.parse_next(input)?;
10271037
let is_multiline = quotes.len() > 1;
10281038
let ml_prefix: Option<String> = if is_multiline {
10291039
Some(
1030-
peek(preceded(
1040+
cut_err(peek(preceded(
10311041
repeat_till(
10321042
0..,
10331043
(
1034-
repeat(0.., (not(newline), opt(ws_escape), string_char)).map(|()| ()),
1044+
repeat(
1045+
0..,
1046+
(
1047+
not(newline),
1048+
alt((
1049+
ws_escape.void(),
1050+
trace(
1051+
"valid string body char(s)",
1052+
alt((
1053+
('\"', not("\"\"")).void(),
1054+
('\"', not("\"")).void(),
1055+
string_char.void(),
1056+
)),
1057+
)
1058+
.void(),
1059+
)),
1060+
),
1061+
)
1062+
.map(|()| ()),
10351063
newline,
10361064
),
10371065
peek(terminated(
1038-
repeat(0.., unicode_space).map(|()| ()),
1066+
repeat(0.., alt((ws_escape, unicode_space))).map(|()| ()),
10391067
"\"\"\"",
10401068
)),
10411069
)
10421070
.map(|((), ())| ()),
1043-
terminated(repeat(0.., unicode_space).map(|()| ()).take(), "\"\"\""),
1044-
))
1045-
.parse_next(input)?
1046-
.to_string(),
1071+
terminated(
1072+
repeat(0.., alt((ws_escape.map(|_| ""), unicode_space.take())))
1073+
.map(|s: String| s),
1074+
"\"\"\"",
1075+
),
1076+
)))
1077+
.context(cx().lbl("multi-line string"))
1078+
.parse_next(input)?,
10471079
)
10481080
} else {
10491081
None
@@ -1052,30 +1084,40 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
10521084
let parser = repeat_till(
10531085
0..,
10541086
(
1055-
cut_err(alt((&prefix[..], peek(newline).take())))
1087+
cut_err(alt(((&prefix[..]).void(), peek(empty_line).void())))
10561088
.context(cx().msg("matching multiline string prefix").lbl("bad prefix").hlp("Multi-line string bodies must be prefixed by the exact same whitespace as the leading whitespace before the closing '\"\"\"'")),
10571089
alt((
1058-
newline.take().map(|_| "\n".to_string()),
1090+
empty_line.map(|s| s.to_string()),
10591091
repeat_till(
10601092
0..,
1061-
(not(newline), opt(ws_escape), string_char).map(|(_, _, s)| s),
1093+
(
1094+
not(newline),
1095+
alt((
1096+
ws_escape.map(|_| None),
1097+
alt((
1098+
('\"', not("\"\"")).map(|(c, ())| Some(c)),
1099+
('\"', not("\"")).map(|(c, ())| Some(c)),
1100+
string_char.map(Some),
1101+
))
1102+
))
1103+
).map(|(_, c)| c),
10621104
newline,
10631105
)
10641106
// multiline string literal newlines are normalized to `\n`
1065-
.map(|(s, _): (String, _)| format!("{s}\n")),
1107+
.map(|(cs, _): (Vec<Option<char>>, _)| cs.into_iter().flatten().chain(vec!['\n']).collect::<String>()),
10661108
)),
10671109
)
10681110
.map(|(_, s)| s),
10691111
(
10701112
&prefix[..],
1071-
repeat(0.., unicode_space).map(|()| ()).take(),
1113+
repeat(0.., ws_escape.void()).map(|()| ()),
10721114
peek("\"\"\""),
10731115
),
10741116
)
10751117
.map(|(s, _): (Vec<String>, (_, _, _))| {
10761118
let mut s = s.join("");
10771119
// Slice off the `\n` at the end of the last line.
1078-
s.truncate(s.len() - 1);
1120+
s.truncate(s.len().saturating_sub(1));
10791121
s
10801122
})
10811123
.context(cx().lbl("multi-line quoted string"));
@@ -1090,13 +1132,14 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
10901132
.hlp("You can make a string multi-line by wrapping it in '\"\"\"', with a newline immediately after the opening quotes."),
10911133
),
10921134
),
1093-
opt(ws_escape),
1094-
string_char,
1095-
)
1096-
.map(|(_, _, s)| s),
1097-
(repeat(0.., unicode_space).map(|()| ()).take(), peek("\"")),
1135+
alt((
1136+
ws_escape.map(|_| None),
1137+
string_char.map(Some),
1138+
))
1139+
).map(|(_, c)| c),
1140+
peek("\"")
10981141
)
1099-
.map(|(s, (end, _)): (String, (&'s str, _))| format!("{s}{end}"))
1142+
.map(|(cs, _): (Vec<Option<char>>, _)| cs.into_iter().flatten().collect::<String>())
11001143
.context(cx().lbl("quoted string"));
11011144
cut_err(parser).parse_next(input)?
11021145
};
@@ -1112,8 +1155,19 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
11121155
Ok(KdlValue::String(body))
11131156
}
11141157

1158+
fn empty_line(input: &mut Input<'_>) -> PResult<&'static str> {
1159+
repeat(0.., alt((ws_escape.void(), unicode_space.void())))
1160+
.map(|()| ())
1161+
.parse_next(input)?;
1162+
newline.parse_next(input)?;
1163+
Ok("\n")
1164+
}
1165+
11151166
/// Like badval, but is able to slurp up invalid raw strings, which contain whitespace.
11161167
fn quoted_string_badval(input: &mut Input<'_>) -> PResult<()> {
1168+
// TODO(@zkat): this should have different behavior based on whether we're
1169+
// resuming a single or multi-line string. Right now, multi-liners end up
1170+
// with silly errors.
11171171
(
11181172
repeat_till(
11191173
0..,
@@ -1135,19 +1189,25 @@ fn quoted_string_terminator(input: &mut Input<'_>) -> PResult<()> {
11351189
/// ```
11361190
fn string_char(input: &mut Input<'_>) -> PResult<char> {
11371191
alt((
1138-
escaped_char,
1139-
(not(disallowed_unicode), none_of(['\\', '"'])).map(|(_, c)| c),
1192+
trace("escaped char", escaped_char),
1193+
trace(
1194+
"regular string char",
1195+
(not(disallowed_unicode), none_of(['\\', '"'])).map(|(_, c)| c),
1196+
),
11401197
))
11411198
.parse_next(input)
11421199
}
11431200

11441201
fn ws_escape(input: &mut Input<'_>) -> PResult<()> {
1145-
(
1146-
"\\",
1147-
repeat(1.., alt((unicode_space, newline))).map(|()| ()),
1202+
trace(
1203+
"ws_escape",
1204+
(
1205+
"\\",
1206+
repeat(1.., alt((unicode_space, newline))).map(|()| ()),
1207+
),
11481208
)
1149-
.void()
1150-
.parse_next(input)
1209+
.void()
1210+
.parse_next(input)
11511211
}
11521212

11531213
/// ```text
@@ -1182,10 +1242,13 @@ fn escaped_char(input: &mut Input<'_>) -> PResult<char> {
11821242
.parse_next(input)
11831243
}
11841244

1185-
/// `raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'`
1186-
/// `raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space*) '"""'`
1187-
/// `single-line-raw-string-body := (unicode - newline - disallowed-literal-code-points)*`
1188-
/// `multi-line-raw-string-body := (unicode - disallowed-literal-code-points)`
1245+
/// ```text
1246+
/// raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
1247+
/// raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body '"""'
1248+
/// single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*?
1249+
/// single-line-raw-string-char := unicode - newline - disallowed-literal-code-points
1250+
/// multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*?
1251+
/// ```
11891252
fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
11901253
let hashes: String = repeat(1.., "#").parse_next(input)?;
11911254
let quotes = alt((("\"\"\"", newline).take(), "\"")).parse_next(input)?;
@@ -1229,10 +1292,10 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
12291292
repeat_till(
12301293
0..,
12311294
(
1232-
cut_err(alt((&prefix[..], peek(newline).take())))
1295+
cut_err(alt(((&prefix[..]).void(), peek(empty_line).void())))
12331296
.context(cx().lbl("matching multiline raw string prefix")),
12341297
alt((
1235-
newline.take().map(|_| "\n".to_string()),
1298+
empty_line.map(|s| s.to_string()),
12361299
repeat_till(
12371300
0..,
12381301
(not(newline), not(("\"\"\"", &hashes[..])), any)
@@ -1254,7 +1317,7 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
12541317
.map(|(s, _): (Vec<String>, (_, _, _))| {
12551318
let mut s = s.join("");
12561319
// Slice off the `\n` at the end of the last line.
1257-
s.truncate(s.len() - 1);
1320+
s.truncate(s.len().saturating_sub(1));
12581321
s
12591322
})
12601323
.parse_next(input)?
@@ -1311,7 +1374,7 @@ mod string_tests {
13111374
}
13121375

13131376
#[test]
1314-
fn quoted_string() {
1377+
fn single_line_quoted_string() {
13151378
assert_eq!(
13161379
string.parse(new_input("\"foo\"")).unwrap(),
13171380
Some(KdlValue::String("foo".into()))
@@ -1363,6 +1426,14 @@ mod string_tests {
13631426
Some(KdlValue::String("\nstring\t".into())),
13641427
"Empty line without any indentation"
13651428
);
1429+
assert_eq!(
1430+
string
1431+
.parse(new_input("\"\"\"\n   \\\n   \n   \"\"\""))
1432+
.unwrap(),
1433+
Some(KdlValue::String("".into())),
1434+
"Escaped whitespace with proper prefix"
1435+
);
1436+
13661437
assert!(string
13671438
.parse(new_input("\"\"\"\nfoo\n bar\n baz\n \"\"\""))
13681439
.is_err());
@@ -1491,9 +1562,9 @@ fn disallowed_unicode(input: &mut Input<'_>) -> PResult<()> {
14911562
/// `escline := '\\' ws* (single-line-comment | newline | eof)`
14921563
fn escline(input: &mut Input<'_>) -> PResult<()> {
14931564
"\\".parse_next(input)?;
1494-
repeat(0.., ws).map(|_: ()| ()).parse_next(input)?;
1565+
wss.parse_next(input)?;
14951566
alt((single_line_comment, newline, eof.void())).parse_next(input)?;
1496-
repeat(0.., ws).map(|_: ()| ()).parse_next(input)
1567+
wss.parse_next(input)
14971568
}
14981569

14991570
#[cfg(test)]
@@ -1596,9 +1667,12 @@ fn multi_line_comment_test() {
15961667
.is_ok());
15971668
}
15981669

1599-
/// slashdash := '/-' line-space*
1670+
/// slashdash := '/-' (node-space | line-space)*
16001671
fn slashdash(input: &mut Input<'_>) -> PResult<()> {
1601-
("/-", repeat(0.., line_space).map(|()| ()))
1672+
(
1673+
"/-",
1674+
repeat(0.., alt((node_space, line_space))).map(|()| ()),
1675+
)
16021676
.void()
16031677
.parse_next(input)
16041678
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node "\"\"\"triple-quote\"\"\"\n##\"too few quotes\"##\n#\"\"\"too few #\"\"\"#"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node "this string contains \"quotes\", twice\"\""
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node "foo bar\nbaz"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node " foo bar\n baz"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node "" "" "" "\n\n " "\n"

tests/test_cases/expected_kdl/raw_string_just_quote.kdl

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node arg2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node arg1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node2

0 commit comments

Comments
 (0)