Skip to content

Commit 54cded0

Browse files
committed
chore: document to plain text refactor
1 parent 433ed82 commit 54cded0

File tree

11 files changed

+564
-10
lines changed

11 files changed

+564
-10
lines changed

collab/src/document/block_parser/document_parser.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ use super::{
22
BlockParserRegistry, BulletedListParser, CalloutParser, CodeBlockParser, DividerParser,
33
DocumentParserDelegate, FileBlockParser, HeadingParser, ImageParser, LinkPreviewParser,
44
MathEquationParser, NumberedListParser, OutputFormat, PageParser, ParagraphParser, ParseContext,
5-
QuoteListParser, SimpleColumnParser, SimpleColumnsParser, SimpleTableCellParser,
6-
SimpleTableParser, SimpleTableRowParser, SubpageParser, TodoListParser, ToggleListParser,
5+
PlainTextResolver, QuoteListParser, SimpleColumnParser, SimpleColumnsParser,
6+
SimpleTableCellParser, SimpleTableParser, SimpleTableRowParser, SubpageParser, TodoListParser,
7+
ToggleListParser,
78
};
89
use crate::document::blocks::{Block, DocumentData};
910
use crate::error::CollabError;
@@ -15,13 +16,15 @@ pub struct DocumentParser {
1516

1617
/// Provide the delegate to handle special cases like mentions during parsing
1718
delegate: Option<Arc<dyn DocumentParserDelegate + Send + Sync>>,
19+
plain_text_resolver: Option<Arc<dyn PlainTextResolver + Send + Sync>>,
1820
}
1921

2022
impl DocumentParser {
2123
pub fn new() -> Self {
2224
Self {
2325
registry: BlockParserRegistry::new(),
2426
delegate: None,
27+
plain_text_resolver: None,
2528
}
2629
}
2730

@@ -38,6 +41,22 @@ impl DocumentParser {
3841
self.delegate.as_ref()
3942
}
4043

44+
pub fn with_plain_text_resolver(
45+
mut self,
46+
resolver: Arc<dyn PlainTextResolver + Send + Sync>,
47+
) -> Self {
48+
self.plain_text_resolver = Some(resolver.clone());
49+
self
50+
}
51+
52+
pub fn set_plain_text_resolver(&mut self, resolver: Arc<dyn PlainTextResolver + Send + Sync>) {
53+
self.plain_text_resolver = Some(resolver);
54+
}
55+
56+
pub fn get_plain_text_resolver(&self) -> Option<&Arc<dyn PlainTextResolver + Send + Sync>> {
57+
self.plain_text_resolver.as_ref()
58+
}
59+
4160
pub fn with_default_parsers() -> Self {
4261
let mut parser = Self::new();
4362

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
pub mod document_parser;
22
pub mod parsers;
3+
pub mod plain_text_resolver;
34
pub mod registry;
45
pub mod text_utils;
56
pub mod traits;
67

78
pub use document_parser::*;
89
pub use parsers::*;
10+
pub use plain_text_resolver::*;
911
pub use registry::*;
1012
pub use text_utils::*;
1113
pub use traits::*;

collab/src/document/block_parser/parsers/file_block.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,18 @@ impl BlockParser for FileBlockParser {
4545
}
4646
},
4747
OutputFormat::PlainText => {
48+
if let Some(resolver) = context.plain_text_resolver() {
49+
if let Some(content) = resolver.resolve_block_text(block, context) {
50+
let indent = context.get_indent();
51+
let resolved = if content.is_empty() {
52+
String::new()
53+
} else {
54+
format!("{}{}", indent, content)
55+
};
56+
return Ok(ParseResult::new(resolved));
57+
}
58+
}
59+
4860
let indent = context.get_indent();
4961
if url.is_empty() {
5062
format!("{}{}", indent, name)

collab/src/document/block_parser/parsers/image.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,17 @@ impl BlockParser for ImageParser {
2828
format!("![Image]({})", url)
2929
}
3030
},
31-
crate::document::OutputFormat::PlainText => url.to_string(),
31+
crate::document::OutputFormat::PlainText => {
32+
if let Some(resolver) = context.plain_text_resolver() {
33+
if let Some(content) = resolver.resolve_block_text(block, context) {
34+
content
35+
} else {
36+
url.to_string()
37+
}
38+
} else {
39+
url.to_string()
40+
}
41+
},
3242
};
3343

3444
let children_content = self.parse_children(block, context);

collab/src/document/block_parser/parsers/link_preview.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ impl BlockParser for LinkPreviewParser {
3434
}
3535
},
3636
OutputFormat::PlainText => {
37+
if let Some(resolver) = context.plain_text_resolver() {
38+
if let Some(content) = resolver.resolve_block_text(block, context) {
39+
let indent = context.get_indent();
40+
let resolved = if content.is_empty() {
41+
String::new()
42+
} else {
43+
format!("{}{}", indent, content)
44+
};
45+
return Ok(ParseResult::new(resolved));
46+
}
47+
}
3748
let indent = context.get_indent();
3849
if url.is_empty() {
3950
"".to_string()

collab/src/document/block_parser/parsers/subpage.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,18 @@ impl BlockParser for SubpageParser {
3434
}
3535
},
3636
OutputFormat::PlainText => {
37+
if let Some(resolver) = context.plain_text_resolver() {
38+
if let Some(content) = resolver.resolve_block_text(block, context) {
39+
let indent = context.get_indent();
40+
let resolved = if content.is_empty() {
41+
String::new()
42+
} else {
43+
format!("{}{}", indent, content)
44+
};
45+
return Ok(ParseResult::new(resolved));
46+
}
47+
}
48+
3749
let indent = context.get_indent();
3850
if view_id.is_empty() {
3951
"".to_string()
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc};
2+
use serde_json::{Map as JsonMap, Value as JsonValue};
3+
use std::collections::HashMap;
4+
use std::fmt::Debug;
5+
6+
use super::traits::{DocumentParserDelegate, ParseContext};
7+
use crate::document::blocks::{Block, BlockType};
8+
9+
use crate::preclude::{Any, Attrs};
10+
11+
const MENTION_KEY: &str = "mention";
12+
const MENTION_TYPE_KEY: &str = "type";
13+
const PERSON_TYPE: &str = "person";
14+
const PAGE_TYPE: &str = "page";
15+
const CHILD_PAGE_TYPE: &str = "childPage";
16+
const DATE_TYPE: &str = "date";
17+
const REMINDER_TYPE: &str = "reminder";
18+
const EXTERNAL_LINK_TYPE: &str = "externalLink";
19+
20+
/// Trait used to customise how plain text export handles mention attributes and embed blocks.
21+
///
22+
/// The trait extends [`DocumentParserDelegate`] so it can hook into the existing delta visitor
23+
/// infrastructure, while also offering a block-level resolution hook via
24+
/// [`PlainTextResolver::resolve_block_text`].
25+
pub trait PlainTextResolver: DocumentParserDelegate + Debug + Send + Sync {
26+
/// Resolve a block (such as sub page, file, image) into a plain-text representation.
27+
///
28+
/// Returning `None` lets the default parser formatting kick in.
29+
fn resolve_block_text(&self, _block: &Block, _context: &ParseContext) -> Option<String> {
30+
None
31+
}
32+
}
33+
34+
/// Default implementation that relies on data already present in the document delta.
35+
///
36+
/// Consumers can supply override maps to control rendered labels for person and document ids.
37+
#[derive(Debug, Default, Clone)]
38+
pub struct DefaultPlainTextResolver {
39+
person_names: HashMap<String, String>,
40+
document_titles: HashMap<String, String>,
41+
}
42+
43+
impl DefaultPlainTextResolver {
44+
pub fn new() -> Self {
45+
Self::default()
46+
}
47+
48+
pub fn with_person_names(mut self, map: HashMap<String, String>) -> Self {
49+
self.person_names = map;
50+
self
51+
}
52+
53+
pub fn with_document_titles(mut self, map: HashMap<String, String>) -> Self {
54+
self.document_titles = map;
55+
self
56+
}
57+
58+
pub fn set_person_names(&mut self, map: HashMap<String, String>) {
59+
self.person_names = map;
60+
}
61+
62+
pub fn set_document_titles(&mut self, map: HashMap<String, String>) {
63+
self.document_titles = map;
64+
}
65+
66+
fn title_for_document(&self, page_id: &str) -> String {
67+
self
68+
.document_titles
69+
.get(page_id)
70+
.cloned()
71+
.unwrap_or_else(|| page_id.to_string())
72+
}
73+
74+
fn label_for_person(&self, person_id: Option<&str>, person_name: Option<&str>) -> Option<String> {
75+
let id = person_id?;
76+
let base_name = self
77+
.person_names
78+
.get(id)
79+
.cloned()
80+
.or_else(|| person_name.map(ToString::to_string))
81+
.unwrap_or_else(|| id.to_string());
82+
Some(format!("@{}", base_name))
83+
}
84+
85+
fn label_for_document(&self, page_id: Option<&str>) -> Option<String> {
86+
let page_id = page_id?;
87+
Some(format!("[[{}]]", self.title_for_document(page_id)))
88+
}
89+
90+
fn label_for_date(&self, date: Option<&str>, include_time: bool) -> Option<String> {
91+
let date_value = date?;
92+
let formatted =
93+
parse_date_string(date_value, include_time).unwrap_or_else(|| date_value.to_string());
94+
Some(format!("@{}", formatted))
95+
}
96+
97+
fn label_for_external_link(&self, url: Option<&str>) -> Option<String> {
98+
let url = url?;
99+
if url.is_empty() {
100+
None
101+
} else {
102+
Some(url.to_string())
103+
}
104+
}
105+
}
106+
107+
impl DocumentParserDelegate for DefaultPlainTextResolver {
108+
fn handle_text_delta(
109+
&self,
110+
text: &str,
111+
attributes: Option<&Attrs>,
112+
_context: &ParseContext,
113+
) -> Option<String> {
114+
if text != "$" {
115+
return None;
116+
}
117+
118+
let mention = MentionInfo::from_attrs(attributes)?;
119+
match mention.kind.as_str() {
120+
PERSON_TYPE => self.label_for_person(
121+
mention.string("person_id").as_deref(),
122+
mention.string("person_name").as_deref(),
123+
),
124+
PAGE_TYPE | CHILD_PAGE_TYPE => self.label_for_document(mention.string("page_id").as_deref()),
125+
DATE_TYPE | REMINDER_TYPE => self.label_for_date(
126+
mention.string("date").as_deref(),
127+
mention.bool("include_time").unwrap_or(false),
128+
),
129+
EXTERNAL_LINK_TYPE => self.label_for_external_link(mention.string("url").as_deref()),
130+
_ => None,
131+
}
132+
}
133+
}
134+
135+
impl PlainTextResolver for DefaultPlainTextResolver {
136+
fn resolve_block_text(&self, block: &Block, _context: &ParseContext) -> Option<String> {
137+
match BlockType::from_block_ty(block.ty.as_str()) {
138+
BlockType::SubPage => {
139+
let view_id = block
140+
.data
141+
.get("view_id")
142+
.or_else(|| block.data.get("viewId"));
143+
view_id
144+
.and_then(|value| value.as_str())
145+
.map(|id| self.title_for_document(id))
146+
},
147+
BlockType::LinkPreview => block
148+
.data
149+
.get("url")
150+
.and_then(|value| value.as_str())
151+
.filter(|url| !url.is_empty())
152+
.map(|url| url.to_string()),
153+
BlockType::File => {
154+
let name = block.data.get("name").and_then(|value| value.as_str());
155+
let url = block.data.get("url").and_then(|value| value.as_str());
156+
name
157+
.map(|n| {
158+
if let Some(u) = url {
159+
if !u.is_empty() {
160+
format!("{} ({})", n, u)
161+
} else {
162+
n.to_string()
163+
}
164+
} else {
165+
n.to_string()
166+
}
167+
})
168+
.or_else(|| url.map(|u| u.to_string()))
169+
},
170+
BlockType::Image => block
171+
.data
172+
.get("url")
173+
.and_then(|value| value.as_str())
174+
.filter(|url| !url.is_empty())
175+
.map(|url| url.to_string()),
176+
_ => None,
177+
}
178+
}
179+
}
180+
181+
#[derive(Debug, Clone)]
182+
struct MentionInfo {
183+
kind: String,
184+
data: JsonMap<String, JsonValue>,
185+
}
186+
187+
impl MentionInfo {
188+
fn from_attrs(attrs: Option<&Attrs>) -> Option<Self> {
189+
let attrs = attrs?;
190+
let mention_any = attrs.get(MENTION_KEY)?;
191+
192+
let mention_map = mention_any_to_map(mention_any)?;
193+
let kind = mention_map
194+
.get(MENTION_TYPE_KEY)
195+
.and_then(|value| value.as_str())
196+
.unwrap_or_default()
197+
.to_string();
198+
199+
Some(Self {
200+
kind,
201+
data: mention_map,
202+
})
203+
}
204+
205+
fn string(&self, key: &str) -> Option<String> {
206+
self
207+
.data
208+
.get(key)
209+
.and_then(|value| value.as_str())
210+
.map(|value| value.to_string())
211+
}
212+
213+
fn bool(&self, key: &str) -> Option<bool> {
214+
self.data.get(key).and_then(|value| match value {
215+
JsonValue::Bool(flag) => Some(*flag),
216+
JsonValue::String(text) => text.parse::<bool>().ok(),
217+
_ => None,
218+
})
219+
}
220+
}
221+
222+
fn mention_any_to_map(mention_any: &Any) -> Option<JsonMap<String, JsonValue>> {
223+
let serialized = serde_json::to_string(mention_any).ok()?;
224+
225+
if let Ok(map) = serde_json::from_str::<JsonMap<String, JsonValue>>(&serialized) {
226+
return Some(map);
227+
}
228+
229+
let inner = serde_json::from_str::<String>(&serialized).ok()?;
230+
serde_json::from_str::<JsonMap<String, JsonValue>>(&inner).ok()
231+
}
232+
233+
fn parse_date_string(input: &str, include_time: bool) -> Option<String> {
234+
if let Ok(datetime) = DateTime::parse_from_rfc3339(input) {
235+
return Some(if include_time {
236+
datetime
237+
.with_timezone(&Utc)
238+
.format("%Y-%m-%d %H:%M")
239+
.to_string()
240+
} else {
241+
datetime.date_naive().format("%Y-%m-%d").to_string()
242+
});
243+
}
244+
245+
if include_time {
246+
if let Ok(datetime) = NaiveDateTime::parse_from_str(input, "%Y-%m-%d %H:%M:%S") {
247+
return Some(datetime.format("%Y-%m-%d %H:%M").to_string());
248+
}
249+
} else if let Ok(date) = NaiveDate::parse_from_str(input, "%Y-%m-%d") {
250+
return Some(date.format("%Y-%m-%d").to_string());
251+
}
252+
253+
None
254+
}

0 commit comments

Comments
 (0)