Skip to content

Commit 989c417

Browse files
authored
add utility for replacing strings within Inlines (#402)
The core of this change is flat_inlines.rs, which lets you: 1. Translate a Vec<Inline> (which is a recursive data structure) into a flat representation. This representation is (a) a plain String, and (b) a series of formatting events, specified by offset and length. For example, "bold starting at char 5 for 10 chars". 2. Replace a slice of this flattened structure with another string. 3. Unflatten the structure back into Vec<Inline> This will be used in #376.
1 parent 838263b commit 989c417

File tree

8 files changed

+1909
-5
lines changed

8 files changed

+1909
-5
lines changed

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,6 @@ lazy_static = "1.4.0"
3030
indoc = "2"
3131
serde = { version = "1", features = ["derive"] }
3232
toml = "0.8"
33+
34+
[lints.rust]
35+
dead_code = "allow" # TODO remove this; it's a temporary crutch for the phased commits of #376

src/md_elem/flat_inlines.rs

Lines changed: 1597 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
use crate::md_elem::flat_inlines::{FlattenedText, RangeReplacementError};
2+
use crate::md_elem::tree::elem::Inline;
3+
use std::error::Error;
4+
use std::fmt::{Display, Formatter};
5+
6+
#[derive(Clone, Debug, PartialEq)]
7+
pub(crate) enum RegexReplaceError {
8+
InvalidRegex { pattern: String, error: String },
9+
ReplacementError(RangeReplacementError),
10+
}
11+
12+
impl Display for RegexReplaceError {
13+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
14+
match self {
15+
RegexReplaceError::InvalidRegex { pattern, error } => write!(f, "invalid regex {pattern:?}: {error}"),
16+
RegexReplaceError::ReplacementError(RangeReplacementError::InternalError(e)) => {
17+
write!(f, "internal error: {e}")
18+
}
19+
RegexReplaceError::ReplacementError(RangeReplacementError::AtomicityViolation) => {
20+
write!(f, "replacement crosses atomic boundary")
21+
}
22+
}
23+
}
24+
}
25+
26+
impl Error for RegexReplaceError {}
27+
28+
#[derive(Debug)]
29+
pub(crate) struct Replaced<T> {
30+
pub(crate) item: T,
31+
pub(crate) matched_any: bool,
32+
}
33+
34+
/// Applies regex search and replace to a vector of inline elements.
35+
///
36+
/// This flattens the inlines, applies the regex replacement, and reconstructs
37+
/// the tree structure. Returns an error if the regex would cross formatting
38+
/// boundaries that cannot be represented (like links or unsupported content).
39+
pub(crate) fn regex_replace_inlines(
40+
inlines: impl IntoIterator<Item = Inline>,
41+
pattern: &fancy_regex::Regex,
42+
replacement: Option<&str>,
43+
) -> Result<Replaced<Vec<Inline>>, RegexReplaceError> {
44+
// TODO should I have this take an owned Vec<Inline>? If I do, then if there are no matches I can just return the
45+
// original inlines, and thus save on the unflatten step.
46+
let mut flattened = FlattenedText::from_inlines(inlines);
47+
48+
let mut replaced_string = String::new();
49+
let flattened_text = flattened.text.to_string();
50+
let matched_any = match replacement {
51+
None => pattern
52+
.is_match(&flattened_text)
53+
.map_err(|e| map_re_error(e, pattern))?,
54+
Some(replacement) => {
55+
let mut matched_any = false;
56+
for capture in pattern.captures_iter(&flattened_text) {
57+
matched_any = true;
58+
let capture = capture.map_err(|e| map_re_error(e, pattern))?;
59+
let capture_match = capture.get(0).expect("unwrap of capture's 0-group");
60+
replaced_string.clear();
61+
capture.expand(replacement, &mut replaced_string);
62+
let capture_range = capture_match.start()..capture_match.end();
63+
flattened
64+
.replace_range(capture_range, &replaced_string)
65+
.map_err(RegexReplaceError::ReplacementError)?;
66+
}
67+
matched_any
68+
}
69+
};
70+
71+
let unflattened = flattened.unflatten().map_err(RegexReplaceError::ReplacementError)?;
72+
Ok(Replaced {
73+
matched_any,
74+
item: unflattened,
75+
})
76+
}
77+
78+
fn map_re_error(e: fancy_regex::Error, pattern: &fancy_regex::Regex) -> RegexReplaceError {
79+
RegexReplaceError::InvalidRegex {
80+
pattern: pattern.as_str().to_string(),
81+
error: format!("{e}"),
82+
}
83+
}
84+
85+
#[cfg(test)]
86+
mod tests {
87+
use super::*;
88+
89+
use crate::md_elem::tree_test_utils::inlines;
90+
91+
#[test]
92+
fn simple_replacement() {
93+
let inlines = inlines!["hello world"];
94+
let pattern = fancy_regex::Regex::new(r"world").unwrap();
95+
let result = regex_replace_inlines(inlines, &pattern, Some("rust")).unwrap();
96+
97+
assert_eq!(result.item, inlines!["hello rust"]);
98+
assert!(result.matched_any);
99+
}
100+
101+
#[test]
102+
fn simple_replacement_to_same() {
103+
let inlines = inlines!["hello world"];
104+
let pattern = fancy_regex::Regex::new(r"world").unwrap();
105+
let result = regex_replace_inlines(inlines, &pattern, Some("world")).unwrap();
106+
107+
assert_eq!(result.item, inlines!["hello world"]); // same as original
108+
assert!(result.matched_any);
109+
}
110+
111+
#[test]
112+
fn no_match_returns_original() {
113+
let inlines = inlines!["hello world"];
114+
let pattern = fancy_regex::Regex::new(r"foo").unwrap();
115+
let result = regex_replace_inlines(inlines.clone(), &pattern, Some("bar")).unwrap();
116+
117+
assert_eq!(result.item, inlines);
118+
assert!(!result.matched_any);
119+
}
120+
121+
#[test]
122+
fn replacement_with_formatting() {
123+
let inlines = inlines!["before ", em["emphasized"], " after"];
124+
let pattern = fancy_regex::Regex::new(r"emphasized").unwrap();
125+
let result = regex_replace_inlines(inlines, &pattern, Some("replaced")).unwrap();
126+
127+
let expected = inlines!["before ", em["replaced"], " after"];
128+
assert_eq!(result.item, expected);
129+
}
130+
131+
#[test]
132+
fn partial_replacement_with_formatting() {
133+
let inlines = inlines!["before ", em["emphasized and"], " after"];
134+
let pattern = fancy_regex::Regex::new(r"emphasized").unwrap();
135+
let result = regex_replace_inlines(inlines, &pattern, Some("replaced")).unwrap();
136+
137+
let expected = inlines!["before ", em["replaced and"], " after"];
138+
assert_eq!(result.item, expected);
139+
}
140+
141+
#[test]
142+
fn replacement_across_formatting() {
143+
let inlines = inlines!["before ", em["emphasized"], " after"];
144+
145+
let pattern = fancy_regex::Regex::new(r"ore emphasized af").unwrap();
146+
let result = regex_replace_inlines(inlines, &pattern, Some("oo")).unwrap();
147+
148+
// When replacement spans formatting boundaries, formatting should be removed
149+
let expected = inlines!["befooter"];
150+
assert_eq!(result.item, expected);
151+
}
152+
153+
#[test]
154+
fn capture_groups() {
155+
let inlines = inlines!["hello world"];
156+
let pattern = fancy_regex::Regex::new(r"(\w+) (\w+)").unwrap();
157+
let result = regex_replace_inlines(inlines, &pattern, Some("$2 $1")).unwrap();
158+
159+
assert_eq!(result.item, inlines!["world hello"]);
160+
}
161+
162+
#[test]
163+
fn multiple_matches() {
164+
let inlines = inlines!["foo bar foo baz"];
165+
let pattern = fancy_regex::Regex::new(r"foo").unwrap();
166+
let result = regex_replace_inlines(inlines, &pattern, Some("qux")).unwrap();
167+
168+
assert_eq!(result.item, inlines!["qux bar qux baz"]);
169+
assert!(result.matched_any);
170+
}
171+
172+
#[test]
173+
fn unsupported_content_error() {
174+
let inlines = inlines!["before ", link["link text"]("https://example.com"), " after"];
175+
176+
// This should succeed because the regex doesn't cross the link boundary
177+
let pattern = fancy_regex::Regex::new(r"before").unwrap();
178+
let result = regex_replace_inlines(inlines.clone(), &pattern, Some("pre")).unwrap();
179+
assert_eq!(
180+
result.item,
181+
inlines!["pre ", link["link text"]("https://example.com"), " after",],
182+
);
183+
184+
// This should fail because the regex crosses into the link
185+
let pattern = fancy_regex::Regex::new(r"ore link").unwrap();
186+
let result = regex_replace_inlines(inlines, &pattern, Some("replacement"));
187+
assert!(result.is_err());
188+
}
189+
}

src/md_elem/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ mod tree_ref;
77
pub use tree::*;
88

99
mod concatenate;
10+
mod flat_inlines;
11+
pub(crate) mod inline_regex_replace;
1012
#[cfg(test)]
1113
pub(crate) mod tree_test_utils;
1214

src/md_elem/tree_test_utils.rs

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ mod test_utils {
2424
pub(crate) use md_elems;
2525

2626
macro_rules! mdq_inline {
27+
// todo replace this with the `inlines!` macro below
2728
(span $which:ident [$($contents:expr),*$(,)?]) => {
2829
crate::md_elem::elem::Inline::Span(Span {
2930
variant: crate::md_elem::elem::SpanVariant::$which,
@@ -40,6 +41,118 @@ mod test_utils {
4041
use crate::md_elem::elem::BlockHtml;
4142
pub(crate) use mdq_inline;
4243

44+
macro_rules! inlines {
45+
// Empty case
46+
[] => {
47+
Vec::<crate::md_elem::elem::Inline>::new()
48+
};
49+
50+
// String literal (optionally followed by more content)
51+
[$text:literal $(, $($rest:tt)*)?] => {
52+
crate::md_elem::inlines!(text[$text] $(, $($rest)*)?)
53+
};
54+
55+
// Plain text (optionally followed by more content)
56+
[text[$text:expr] $(, $($rest:tt)*)?] => {
57+
{
58+
#[allow(unused_mut)]
59+
let mut result = vec![
60+
crate::md_elem::elem::Inline::Text(crate::md_elem::elem::Text {
61+
variant: crate::md_elem::elem::TextVariant::Plain,
62+
value: $text.to_string(),
63+
})
64+
];
65+
$(result.extend(inlines![$($rest)*]);)?
66+
result
67+
}
68+
};
69+
70+
// Emphasis (optionally followed by more content)
71+
[em[$($content:tt)*] $(, $($rest:tt)*)?] => {
72+
{
73+
#[allow(unused_mut)]
74+
let mut result = vec![
75+
crate::md_elem::elem::Inline::Span(crate::md_elem::elem::Span {
76+
variant: crate::md_elem::elem::SpanVariant::Emphasis,
77+
children: inlines![$($content)*],
78+
})
79+
];
80+
$(result.extend(inlines![$($rest)*]);)?
81+
result
82+
}
83+
};
84+
85+
// Strong (optionally followed by more content)
86+
[strong[$($content:tt)*] $(, $($rest:tt)*)?] => {
87+
{
88+
#[allow(unused_mut)]
89+
let mut result = vec![
90+
crate::md_elem::elem::Inline::Span(crate::md_elem::elem::Span {
91+
variant: crate::md_elem::elem::SpanVariant::Strong,
92+
children: inlines![$($content)*],
93+
})
94+
];
95+
$(result.extend(inlines![$($rest)*]);)?
96+
result
97+
}
98+
};
99+
100+
// Link (optionally followed by more content)
101+
[link[$($display:tt)*] ($url:literal) $(, $($rest:tt)*)?] => {
102+
{
103+
#[allow(unused_mut)]
104+
let mut result = vec![
105+
crate::md_elem::elem::Inline::Link(crate::md_elem::elem::Link::Standard(
106+
crate::md_elem::elem::StandardLink {
107+
display: inlines![$($display)*],
108+
link: crate::md_elem::elem::LinkDefinition {
109+
url: $url.to_string(),
110+
title: None,
111+
reference: crate::md_elem::elem::LinkReference::Inline,
112+
},
113+
}
114+
))
115+
];
116+
$(result.extend(inlines![$($rest)*]);)?
117+
result
118+
}
119+
};
120+
121+
// image (optionally followed by more content)
122+
[image[$alt:expr] ($url:expr) $(, $($rest:tt)*)?] => {
123+
{
124+
#[allow(unused_mut)]
125+
let mut result = vec![
126+
crate::md_elem::elem::Inline::Image(crate::md_elem::elem::Image{
127+
alt: $alt.to_string(),
128+
link: crate::md_elem::elem::LinkDefinition {
129+
url: $url.to_string(),
130+
title: None,
131+
reference: crate::md_elem::elem::LinkReference::Inline,
132+
}
133+
})
134+
];
135+
$(result.extend(inlines![$($rest)*]);)?
136+
result
137+
}
138+
};
139+
140+
// Footnote, like `footnote["^1"]`
141+
[footnote[$val:expr] $(, $($rest:tt)*)?] => {
142+
{
143+
#[allow(unused_mut)]
144+
let mut result = vec![
145+
crate::md_elem::elem::Inline::Footnote(crate::md_elem::elem::FootnoteId{
146+
id: $val.to_string(),
147+
})
148+
];
149+
$(result.extend(inlines![$($rest)*]);)?
150+
result
151+
}
152+
};
153+
}
154+
pub(crate) use inlines;
155+
43156
impl From<&str> for BlockHtml {
44157
fn from(value: &str) -> Self {
45158
Self {

src/query/pest.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ impl From<pest::error::Error<Rule>> for Error {
5151
}
5252

5353
impl Query {
54-
pub(crate) fn parse(query_text: &str) -> Result<Pairs, Error> {
54+
pub(crate) fn parse(query_text: &str) -> Result<Pairs<'_>, Error> {
5555
QueryPairs::parse(Rule::top, query_text).map_err(Self::format_err)
5656
}
5757

@@ -122,7 +122,7 @@ mod test_helpers {
122122
impl StringVariant {
123123
/// Tries to parse the given string. If it succeeds, returns the parsed Pairs and the remaining, unparsed query
124124
/// text.
125-
pub(crate) fn parse(self, query_text: &str) -> Result<(Pairs, &str), Error> {
125+
pub(crate) fn parse(self, query_text: &str) -> Result<(Pairs<'_>, &str), Error> {
126126
let parsed = QueryPairs::parse(self.as_rule(), query_text)?;
127127
let remaining = match parsed.peek() {
128128
None => query_text,

src/select/string_matcher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ impl StringMatcher {
7575
self.replacement.is_some()
7676
}
7777

78-
pub(crate) fn match_replace(&self, haystack: String) -> Result<StringMatch, StringMatchError> {
78+
pub(crate) fn match_replace(&self, haystack: String) -> Result<StringMatch<'_>, StringMatchError> {
7979
match self.re.is_match(&haystack) {
8080
Ok(is_match) => Ok(if is_match {
8181
let replacement = self.replacement.as_ref().map(|r| (&self.re, r.as_str()));

src/util/output.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ impl StaticIndentInfo {
325325
}
326326
}
327327

328-
fn build(self, blocks: &[Block]) -> IndentInfo {
328+
fn build(self, blocks: &[Block]) -> IndentInfo<'_> {
329329
IndentInfo {
330330
blocks,
331331
static_info: self,
@@ -348,7 +348,7 @@ impl IndentHandler {
348348
}
349349
}
350350

351-
fn get_indentation_info(&mut self, ch: Option<char>, state: WritingState) -> IndentInfo {
351+
fn get_indentation_info(&mut self, ch: Option<char>, state: WritingState) -> IndentInfo<'_> {
352352
// #199: I have a number of branches here. Can I nest some of them, so that in the happy path of a non-newline
353353
// char, I just have a single check?
354354
let mut indent_builder = StaticIndentInfo::new(state);

0 commit comments

Comments
 (0)