|
| 1 | +use crate::md_elem::flat_inlines::{FlattenedText, RangeReplacementError}; |
| 2 | +use crate::md_elem::tree::elem::Inline; |
| 3 | +use std::error::Error; |
| 4 | +use std::fmt::{Display, Formatter}; |
| 5 | + |
| 6 | +#[derive(Clone, Debug, PartialEq)] |
| 7 | +pub(crate) enum RegexReplaceError { |
| 8 | + InvalidRegex { pattern: String, error: String }, |
| 9 | + ReplacementError(RangeReplacementError), |
| 10 | +} |
| 11 | + |
| 12 | +impl Display for RegexReplaceError { |
| 13 | + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
| 14 | + match self { |
| 15 | + RegexReplaceError::InvalidRegex { pattern, error } => write!(f, "invalid regex {pattern:?}: {error}"), |
| 16 | + RegexReplaceError::ReplacementError(RangeReplacementError::InternalError(e)) => { |
| 17 | + write!(f, "internal error: {e}") |
| 18 | + } |
| 19 | + RegexReplaceError::ReplacementError(RangeReplacementError::AtomicityViolation) => { |
| 20 | + write!(f, "replacement crosses atomic boundary") |
| 21 | + } |
| 22 | + } |
| 23 | + } |
| 24 | +} |
| 25 | + |
| 26 | +impl Error for RegexReplaceError {} |
| 27 | + |
| 28 | +#[derive(Debug)] |
| 29 | +pub(crate) struct Replaced<T> { |
| 30 | + pub(crate) item: T, |
| 31 | + pub(crate) matched_any: bool, |
| 32 | +} |
| 33 | + |
| 34 | +/// Applies regex search and replace to a vector of inline elements. |
| 35 | +/// |
| 36 | +/// This flattens the inlines, applies the regex replacement, and reconstructs |
| 37 | +/// the tree structure. Returns an error if the regex would cross formatting |
| 38 | +/// boundaries that cannot be represented (like links or unsupported content). |
| 39 | +pub(crate) fn regex_replace_inlines( |
| 40 | + inlines: impl IntoIterator<Item = Inline>, |
| 41 | + pattern: &fancy_regex::Regex, |
| 42 | + replacement: Option<&str>, |
| 43 | +) -> Result<Replaced<Vec<Inline>>, RegexReplaceError> { |
| 44 | + // TODO should I have this take an owned Vec<Inline>? If I do, then if there are no matches I can just return the |
| 45 | + // original inlines, and thus save on the unflatten step. |
| 46 | + let mut flattened = FlattenedText::from_inlines(inlines); |
| 47 | + |
| 48 | + let mut replaced_string = String::new(); |
| 49 | + let flattened_text = flattened.text.to_string(); |
| 50 | + let matched_any = match replacement { |
| 51 | + None => pattern |
| 52 | + .is_match(&flattened_text) |
| 53 | + .map_err(|e| map_re_error(e, pattern))?, |
| 54 | + Some(replacement) => { |
| 55 | + let mut matched_any = false; |
| 56 | + for capture in pattern.captures_iter(&flattened_text) { |
| 57 | + matched_any = true; |
| 58 | + let capture = capture.map_err(|e| map_re_error(e, pattern))?; |
| 59 | + let capture_match = capture.get(0).expect("unwrap of capture's 0-group"); |
| 60 | + replaced_string.clear(); |
| 61 | + capture.expand(replacement, &mut replaced_string); |
| 62 | + let capture_range = capture_match.start()..capture_match.end(); |
| 63 | + flattened |
| 64 | + .replace_range(capture_range, &replaced_string) |
| 65 | + .map_err(RegexReplaceError::ReplacementError)?; |
| 66 | + } |
| 67 | + matched_any |
| 68 | + } |
| 69 | + }; |
| 70 | + |
| 71 | + let unflattened = flattened.unflatten().map_err(RegexReplaceError::ReplacementError)?; |
| 72 | + Ok(Replaced { |
| 73 | + matched_any, |
| 74 | + item: unflattened, |
| 75 | + }) |
| 76 | +} |
| 77 | + |
| 78 | +fn map_re_error(e: fancy_regex::Error, pattern: &fancy_regex::Regex) -> RegexReplaceError { |
| 79 | + RegexReplaceError::InvalidRegex { |
| 80 | + pattern: pattern.as_str().to_string(), |
| 81 | + error: format!("{e}"), |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +#[cfg(test)] |
| 86 | +mod tests { |
| 87 | + use super::*; |
| 88 | + |
| 89 | + use crate::md_elem::tree_test_utils::inlines; |
| 90 | + |
| 91 | + #[test] |
| 92 | + fn simple_replacement() { |
| 93 | + let inlines = inlines!["hello world"]; |
| 94 | + let pattern = fancy_regex::Regex::new(r"world").unwrap(); |
| 95 | + let result = regex_replace_inlines(inlines, &pattern, Some("rust")).unwrap(); |
| 96 | + |
| 97 | + assert_eq!(result.item, inlines!["hello rust"]); |
| 98 | + assert!(result.matched_any); |
| 99 | + } |
| 100 | + |
| 101 | + #[test] |
| 102 | + fn simple_replacement_to_same() { |
| 103 | + let inlines = inlines!["hello world"]; |
| 104 | + let pattern = fancy_regex::Regex::new(r"world").unwrap(); |
| 105 | + let result = regex_replace_inlines(inlines, &pattern, Some("world")).unwrap(); |
| 106 | + |
| 107 | + assert_eq!(result.item, inlines!["hello world"]); // same as original |
| 108 | + assert!(result.matched_any); |
| 109 | + } |
| 110 | + |
| 111 | + #[test] |
| 112 | + fn no_match_returns_original() { |
| 113 | + let inlines = inlines!["hello world"]; |
| 114 | + let pattern = fancy_regex::Regex::new(r"foo").unwrap(); |
| 115 | + let result = regex_replace_inlines(inlines.clone(), &pattern, Some("bar")).unwrap(); |
| 116 | + |
| 117 | + assert_eq!(result.item, inlines); |
| 118 | + assert!(!result.matched_any); |
| 119 | + } |
| 120 | + |
| 121 | + #[test] |
| 122 | + fn replacement_with_formatting() { |
| 123 | + let inlines = inlines!["before ", em["emphasized"], " after"]; |
| 124 | + let pattern = fancy_regex::Regex::new(r"emphasized").unwrap(); |
| 125 | + let result = regex_replace_inlines(inlines, &pattern, Some("replaced")).unwrap(); |
| 126 | + |
| 127 | + let expected = inlines!["before ", em["replaced"], " after"]; |
| 128 | + assert_eq!(result.item, expected); |
| 129 | + } |
| 130 | + |
| 131 | + #[test] |
| 132 | + fn partial_replacement_with_formatting() { |
| 133 | + let inlines = inlines!["before ", em["emphasized and"], " after"]; |
| 134 | + let pattern = fancy_regex::Regex::new(r"emphasized").unwrap(); |
| 135 | + let result = regex_replace_inlines(inlines, &pattern, Some("replaced")).unwrap(); |
| 136 | + |
| 137 | + let expected = inlines!["before ", em["replaced and"], " after"]; |
| 138 | + assert_eq!(result.item, expected); |
| 139 | + } |
| 140 | + |
| 141 | + #[test] |
| 142 | + fn replacement_across_formatting() { |
| 143 | + let inlines = inlines!["before ", em["emphasized"], " after"]; |
| 144 | + |
| 145 | + let pattern = fancy_regex::Regex::new(r"ore emphasized af").unwrap(); |
| 146 | + let result = regex_replace_inlines(inlines, &pattern, Some("oo")).unwrap(); |
| 147 | + |
| 148 | + // When replacement spans formatting boundaries, formatting should be removed |
| 149 | + let expected = inlines!["befooter"]; |
| 150 | + assert_eq!(result.item, expected); |
| 151 | + } |
| 152 | + |
| 153 | + #[test] |
| 154 | + fn capture_groups() { |
| 155 | + let inlines = inlines!["hello world"]; |
| 156 | + let pattern = fancy_regex::Regex::new(r"(\w+) (\w+)").unwrap(); |
| 157 | + let result = regex_replace_inlines(inlines, &pattern, Some("$2 $1")).unwrap(); |
| 158 | + |
| 159 | + assert_eq!(result.item, inlines!["world hello"]); |
| 160 | + } |
| 161 | + |
| 162 | + #[test] |
| 163 | + fn multiple_matches() { |
| 164 | + let inlines = inlines!["foo bar foo baz"]; |
| 165 | + let pattern = fancy_regex::Regex::new(r"foo").unwrap(); |
| 166 | + let result = regex_replace_inlines(inlines, &pattern, Some("qux")).unwrap(); |
| 167 | + |
| 168 | + assert_eq!(result.item, inlines!["qux bar qux baz"]); |
| 169 | + assert!(result.matched_any); |
| 170 | + } |
| 171 | + |
| 172 | + #[test] |
| 173 | + fn unsupported_content_error() { |
| 174 | + let inlines = inlines, " after"]; |
| 175 | + |
| 176 | + // This should succeed because the regex doesn't cross the link boundary |
| 177 | + let pattern = fancy_regex::Regex::new(r"before").unwrap(); |
| 178 | + let result = regex_replace_inlines(inlines.clone(), &pattern, Some("pre")).unwrap(); |
| 179 | + assert_eq!( |
| 180 | + result.item, |
| 181 | + inlines, " after",], |
| 182 | + ); |
| 183 | + |
| 184 | + // This should fail because the regex crosses into the link |
| 185 | + let pattern = fancy_regex::Regex::new(r"ore link").unwrap(); |
| 186 | + let result = regex_replace_inlines(inlines, &pattern, Some("replacement")); |
| 187 | + assert!(result.is_err()); |
| 188 | + } |
| 189 | +} |
0 commit comments