|
1 | 1 | use once_cell::sync::Lazy; |
2 | 2 | use regex::Regex; |
3 | 3 | static R_GEMINI_LINK: Lazy<Regex> = |
4 | | - Lazy::new(|| Regex::new(r"^=>\s*(?P<href>\S*)\s*(?P<label>.*)").unwrap()); |
5 | | - |
6 | | -#[derive(Debug)] |
7 | | -pub enum PageElement { |
8 | | - Heading(String), |
9 | | - Quote(String), |
10 | | - Preformatted(String), |
11 | | - Text(String), |
| 4 | + Lazy::new(|| Regex::new(r"^=>\s+(?P<href>\S+)(\s+(?P<label>.+))?").unwrap()); |
| 5 | + |
| 6 | +// See gemini://gemini.circumlunar.space/docs/cheatsheet.gmi |
| 7 | + |
| 8 | +#[derive(Debug, Clone)] |
| 9 | +pub enum Tag { |
| 10 | + Paragraph, // Is just a text line |
| 11 | + Heading(u8), |
| 12 | + BlockQuote, |
| 13 | + CodeBlock, |
| 14 | + UnorderedList, |
| 15 | + Item, |
12 | 16 | Link(String, Option<String>), |
13 | | - ListItem(String), |
14 | | - Empty, |
| 17 | +} |
| 18 | + |
| 19 | +#[derive(Debug, Clone)] |
| 20 | +pub enum Event<'a> { |
| 21 | + Start(Tag), |
| 22 | + End, |
| 23 | + Text(&'a str), |
| 24 | + BlankLine, |
15 | 25 | } |
16 | 26 |
|
17 | 27 | #[derive(Debug, Clone, Default)] |
18 | 28 | pub struct Parser { |
19 | | - inside_pre: bool, |
| 29 | + tag_stack: Vec<Tag>, |
20 | 30 | } |
21 | 31 |
|
22 | 32 | impl Parser { |
23 | 33 | pub fn new() -> Self { |
24 | | - Self { inside_pre: false } |
| 34 | + Self { tag_stack: vec![] } |
25 | 35 | } |
26 | | - pub fn parse_line(&mut self, line: &str) -> PageElement { |
| 36 | + |
| 37 | + /// Returns an `Event` when an event it's ready, else, `None` |
| 38 | + // TODO: Make this work on text input of any length, don't impose the "line" chunk requirement |
| 39 | + // some work has already been done, the pushed result is already structured to do so. |
| 40 | + pub fn parse_line<'a>(&mut self, line: &'a str, res: &mut Vec<Event<'a>>) { |
| 41 | + let parent_tag = self.tag_stack.last(); |
| 42 | + |
| 43 | + // Close pending multi-line tags |
| 44 | + if matches!(parent_tag, Some(Tag::BlockQuote)) && !line.starts_with('>') |
| 45 | + || matches!(parent_tag, Some(Tag::UnorderedList)) |
| 46 | + { |
| 47 | + res.push(Event::End); |
| 48 | + self.tag_stack.pop(); |
| 49 | + } |
| 50 | + |
| 51 | + let parent_tag = self.tag_stack.last(); |
| 52 | + |
27 | 53 | if line.starts_with("```") { |
28 | | - self.inside_pre = !self.inside_pre; |
29 | | - PageElement::Empty |
30 | | - } else if self.inside_pre { |
31 | | - PageElement::Preformatted(line.to_string()) |
| 54 | + let inner_res = if let Some(Tag::CodeBlock) = parent_tag { |
| 55 | + self.tag_stack.pop(); |
| 56 | + Event::End |
| 57 | + } else { |
| 58 | + self.tag_stack.push(Tag::CodeBlock); |
| 59 | + Event::Start(Tag::CodeBlock) |
| 60 | + }; |
| 61 | + res.push(inner_res); |
| 62 | + } else if let Some(Tag::CodeBlock) = parent_tag { |
| 63 | + res.push(Event::Text(line)); |
| 64 | + } else if line.trim().is_empty() { |
| 65 | + res.push(Event::BlankLine); |
32 | 66 | } else if line.starts_with('#') { |
33 | | - PageElement::Heading(line.to_string()) |
| 67 | + let line = line.trim_end(); |
| 68 | + let lvl = line.chars().filter(|c| *c == '#').count(); |
| 69 | + let heading = Tag::Heading(lvl as u8); |
| 70 | + res.push(Event::Start(heading)); |
| 71 | + |
| 72 | + let text = line.trim_start_matches('#').trim_start(); |
| 73 | + res.push(Event::Text(text)); |
| 74 | + res.push(Event::End); |
34 | 75 | } else if line.starts_with('>') { |
35 | | - PageElement::Quote(line.to_string()) |
| 76 | + if !matches!(parent_tag, Some(Tag::BlockQuote)) { |
| 77 | + res.push(Event::Start(Tag::BlockQuote)); |
| 78 | + } |
| 79 | + res.push(Event::Text(line.trim_start_matches('>'))); |
36 | 80 | } else if let Some(stripped) = line.strip_prefix("* ") { |
37 | | - PageElement::ListItem(stripped.to_string()) |
38 | | - } else if let Some(captures) = R_GEMINI_LINK.captures(line) { |
39 | | - match (captures.name("href"), captures.name("label")) { |
40 | | - (Some(m_href), Some(m_label)) if !m_label.as_str().is_empty() => PageElement::Link( |
41 | | - m_href.as_str().to_string(), |
42 | | - Some(m_label.as_str().to_string()), |
43 | | - ), |
44 | | - (Some(m_href), _) => PageElement::Link(m_href.as_str().to_string(), None), |
45 | | - _ => PageElement::Empty, |
| 81 | + if !matches!(parent_tag, Some(Tag::UnorderedList)) { |
| 82 | + res.push(Event::Start(Tag::UnorderedList)); |
46 | 83 | } |
| 84 | + res.push(Event::Start(Tag::Item)); |
| 85 | + res.push(Event::Text(stripped.trim_end())); |
| 86 | + res.push(Event::End); |
| 87 | + } else if let Some(captures) = R_GEMINI_LINK.captures(line.trim_end()) { |
| 88 | + let href = captures.name("href").unwrap(); |
| 89 | + let label = captures.name("label").map(|x| x.as_str()); |
| 90 | + res.push(Event::Start(Tag::Link( |
| 91 | + href.as_str().to_string(), |
| 92 | + label.map(|x| x.to_string()), |
| 93 | + ))); |
| 94 | + res.push(Event::End); |
47 | 95 | } else { |
48 | | - PageElement::Text(line.to_string()) |
| 96 | + res.push(Event::Start(Tag::Paragraph)); |
| 97 | + res.push(Event::Text(line.trim_end())); |
| 98 | + res.push(Event::End); |
49 | 99 | } |
50 | 100 | } |
51 | 101 | } |
0 commit comments