Skip to content

Commit 21f83af

Browse files
benlubasawegsche
andauthored
feat: parse_tree (#13)
* feat: stage 4 * fix: test failure * fix: single line verbatim ranged tags due to escaped newlines * fix: accept test snapshots * fix: convert carryover tag contents --------- Co-authored-by: awegsch <[email protected]>
1 parent 94d9e33 commit 21f83af

File tree

11 files changed

+933
-12
lines changed

11 files changed

+933
-12
lines changed

proptest-regressions/lib.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,7 @@
77
cc fb9b5df4fe46fe331cc3aa40bba6501c1c603084688fd02dda6d1c73106c1324 # shrinks to tag_name = "A", parameter = "A", multi_parameter = "\u{b}"
88
cc 28afae9872324ba0632a8023219e32939580363ce8b99752dc19fae0ac5b63d1 # shrinks to paragraph_content = " "
99
cc 5b31c9987c98fc0e4faa50b782e5952e0948d2c2a60dd29081c8f54c75b4b52c # shrinks to tag_name = "ø", parameter = "a", multi_parameter = " "
10+
cc 67cb989806791583bc835dd766e8f7ef294a1a634f6857cd8e6dd16a49a09246 # shrinks to tag_name = "\u{1d165}", parameter = "<*\u{2060}>", multi_parameter = "a", content = ""
11+
cc cacc392d5a052fbd56e86b2ae08c6380a7a191847001424d938930b94e0f449f # shrinks to tag_type = "@", tag_name = "a", parameter = "a", multi_parameter = "<*0>", content = "\u{2060}"
12+
cc 15209ad4b4a04639c42c6f2d9629decdcd554a46a3638a63a8a04f9822a3d33c # shrinks to tag_name = "0", parameter = "a", multi_parameter = "\\", content = "\\"
13+
cc 98c6844a3274a61cba1c4be4bf931c3c136b24dbe9c75187f53e9e6a45a1508d # shrinks to tag_type = "@", tag_name = "𖩠", parameter = "!", multi_parameter = "\t", content = "a\\"

src/error.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
use chumsky::error::Simple;
22

3-
use crate::{stage_1::NorgToken, stage_2::NorgBlock};
3+
use crate::{stage_1::NorgToken, stage_2::NorgBlock, NorgASTFlat};
44

55
/// Represents errors that can occur during the parsing process across different stages.
66
#[derive(Debug)]
77
pub enum NorgParseError {
88
Stage1(Vec<Simple<char>>),
99
Stage2(Vec<Simple<NorgToken>>),
1010
Stage3(Vec<Simple<NorgBlock>>),
11+
Stage4(Vec<Simple<NorgASTFlat>>),
1112
}
1213

1314
impl From<Vec<Simple<char>>> for NorgParseError {
@@ -27,3 +28,9 @@ impl From<Vec<Simple<NorgBlock>>> for NorgParseError {
2728
NorgParseError::Stage3(error)
2829
}
2930
}
31+
32+
impl From<Vec<Simple<NorgASTFlat>>> for NorgParseError {
33+
fn from(error: Vec<Simple<NorgASTFlat>>) -> Self {
34+
NorgParseError::Stage4(error)
35+
}
36+
}

src/lib.rs

Lines changed: 138 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
use chumsky::Parser as _;
22
use error::NorgParseError;
33

4-
use crate::stage_1::stage_1;
4+
pub use crate::stage_1::stage_1;
55
use crate::stage_2::stage_2;
6+
use crate::stage_4::stage_4;
67

78
pub use crate::stage_2::ParagraphSegmentToken;
89
pub use crate::stage_3::*;
10+
pub use crate::stage_4::NorgAST;
911

1012
mod error;
1113
mod stage_1;
1214
mod stage_2;
1315
mod stage_3;
16+
mod stage_4;
1417

1518
/// Parses the given input string through multiple stages to produce a flattened abstract syntax tree (AST).
1619
///
@@ -26,13 +29,19 @@ pub fn parse(input: &str) -> Result<Vec<NorgASTFlat>, NorgParseError> {
2629
Ok(stage_3().parse(stage_2().parse(stage_1().parse(input)?)?)?)
2730
}
2831

32+
pub fn parse_tree(input: &str) -> Result<Vec<NorgAST>, NorgParseError> {
33+
Ok(stage_4(
34+
stage_3().parse(stage_2().parse(stage_1().parse(input)?)?)?,
35+
))
36+
}
37+
2938
#[cfg(test)]
3039
mod tests {
3140
use insta::assert_yaml_snapshot;
3241
use itertools::Itertools;
3342
use proptest::{prop_oneof, proptest};
3443

35-
use crate::parse;
44+
use crate::{parse, parse_tree};
3645

3746
const TAG_NAME_REGEX: &str = r"[\w_\-\.\d]+";
3847
const TAG_PARAMETER_REGEX: &str = r"[^\s]+";
@@ -78,6 +87,90 @@ mod tests {
7887
assert_yaml_snapshot!(examples);
7988
}
8089

90+
#[test]
91+
fn headings_tree() {
92+
let headings_tree_examples: Vec<_> = [
93+
"
94+
* Heading
95+
** Another heading
96+
",
97+
"
98+
* Heading
99+
** Subheading
100+
content
101+
* Back to regular heading
102+
",
103+
]
104+
.into_iter()
105+
.map(|example| example.to_string() + "\n")
106+
.map(|str| parse_tree(&str))
107+
.try_collect()
108+
.unwrap();
109+
assert_yaml_snapshot!(headings_tree_examples);
110+
}
111+
112+
#[test]
113+
fn delimiting_mods_tree() {
114+
let examples: Vec<_> = [
115+
"* One
116+
content
117+
---
118+
dedented",
119+
"* One
120+
** Two
121+
===
122+
none",
123+
"** Two
124+
two
125+
___
126+
two",
127+
"- list
128+
___
129+
no list",
130+
"* One
131+
one
132+
** Two
133+
two
134+
*** Three
135+
three
136+
---
137+
two
138+
---
139+
one
140+
---
141+
none",
142+
]
143+
.into_iter()
144+
.map(|example| example.to_string() + "\n")
145+
.map(|str| parse_tree(&str))
146+
.try_collect()
147+
.unwrap();
148+
assert_yaml_snapshot!(examples);
149+
}
150+
151+
#[test]
152+
fn lists_tree() {
153+
let examples: Vec<_> = [
154+
"- base",
155+
"- one
156+
-- two",
157+
"- one
158+
-- two
159+
with content
160+
-- two (2)
161+
--- three
162+
- one",
163+
"-- two
164+
- one",
165+
]
166+
.into_iter()
167+
.map(|example| example.to_string() + "\n")
168+
.map(|str| parse_tree(&str))
169+
.try_collect()
170+
.unwrap();
171+
assert_yaml_snapshot!(examples);
172+
}
173+
81174
#[test]
82175
fn lists() {
83176
let examples: Vec<_> = [
@@ -377,6 +470,49 @@ mod tests {
377470
assert_yaml_snapshot!(examples);
378471
}
379472

473+
#[test]
474+
fn carryover_tags_tree() {
475+
let examples: Vec<_> = [
476+
"
477+
#id 123
478+
* tree
479+
** nested
480+
",
481+
"
482+
* tree
483+
#id there
484+
** nested
485+
---
486+
part of tree
487+
",
488+
"
489+
#name main
490+
-- two
491+
---- four
492+
#id 3
493+
--- three
494+
",
495+
"
496+
#comment
497+
multi-line
498+
comments
499+
---
500+
out
501+
",
502+
"
503+
#id 123
504+
#comment
505+
comment with id
506+
",
507+
]
508+
.into_iter()
509+
.map(|example| example.to_string() + "\n")
510+
.map(|str| parse_tree(&str))
511+
.try_collect()
512+
.unwrap();
513+
assert_yaml_snapshot!(examples);
514+
}
515+
380516
proptest! {
381517
#[test]
382518
fn carryover_tags_proptests(tag_name in TAG_NAME_REGEX, parameter in TAG_PARAMETER_REGEX, multi_parameter in TAG_MULTI_PARAMETER_REGEX) {
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
---
2+
source: src/lib.rs
3+
expression: examples
4+
---
5+
- - CarryoverTag:
6+
tag_type: Macro
7+
name:
8+
- id
9+
parameters:
10+
- "123"
11+
next_object:
12+
Heading:
13+
level: 1
14+
title:
15+
- Token:
16+
Text: tree
17+
extensions: []
18+
content:
19+
- Heading:
20+
level: 2
21+
title:
22+
- Token:
23+
Text: nested
24+
extensions: []
25+
content: []
26+
- - Heading:
27+
level: 1
28+
title:
29+
- Token:
30+
Text: tree
31+
extensions: []
32+
content:
33+
- CarryoverTag:
34+
tag_type: Macro
35+
name:
36+
- id
37+
parameters:
38+
- there
39+
next_object:
40+
Heading:
41+
level: 2
42+
title:
43+
- Token:
44+
Text: nested
45+
extensions: []
46+
content: []
47+
- Paragraph:
48+
- Token:
49+
Text: part
50+
- Token: Whitespace
51+
- Token:
52+
Text: of
53+
- Token: Whitespace
54+
- Token:
55+
Text: tree
56+
- - CarryoverTag:
57+
tag_type: Macro
58+
name:
59+
- name
60+
parameters:
61+
- main
62+
next_object:
63+
NestableDetachedModifier:
64+
modifier_type: UnorderedList
65+
level: 2
66+
extensions: []
67+
text:
68+
Paragraph:
69+
- Token:
70+
Text: two
71+
content:
72+
- NestableDetachedModifier:
73+
modifier_type: UnorderedList
74+
level: 4
75+
extensions: []
76+
text:
77+
Paragraph:
78+
- Token:
79+
Text: four
80+
content: []
81+
- CarryoverTag:
82+
tag_type: Macro
83+
name:
84+
- id
85+
parameters:
86+
- "3"
87+
next_object:
88+
NestableDetachedModifier:
89+
modifier_type: UnorderedList
90+
level: 3
91+
extensions: []
92+
text:
93+
Paragraph:
94+
- Token:
95+
Text: three
96+
content: []
97+
- - CarryoverTag:
98+
tag_type: Macro
99+
name:
100+
- comment
101+
parameters: []
102+
next_object:
103+
Paragraph:
104+
- Token:
105+
Text: multi
106+
- Token:
107+
Special: "-"
108+
- Token:
109+
Text: line
110+
- Token: Whitespace
111+
- Token:
112+
Text: comments
113+
- DelimitingModifier: Weak
114+
- Paragraph:
115+
- Token:
116+
Text: out
117+
- - CarryoverTag:
118+
tag_type: Macro
119+
name:
120+
- id
121+
parameters:
122+
- "123"
123+
next_object:
124+
CarryoverTag:
125+
tag_type: Macro
126+
name:
127+
- comment
128+
parameters: []
129+
next_object:
130+
Paragraph:
131+
- Token:
132+
Text: comment
133+
- Token: Whitespace
134+
- Token:
135+
Text: with
136+
- Token: Whitespace
137+
- Token:
138+
Text: id

0 commit comments

Comments
 (0)