Skip to content

Commit 61a61bc

Browse files
authored
implement regex replace for URLs
This PR adds search-and-replace functionality, allowing users to modify link and image URLs using regex patterns. Users can now use `!s/pattern/replacement/` syntax to find and replace URL content. For now, only URLs support search-replace. General spans will come later (see #376). ## High-level changes - New `!s/pattern/replacement/` syntax for regex search-replace operations, extending the existing `/pattern/` regex syntax - Replace `Matcher` with a new `MatchReplace` in the `Selector` variants' structs. This combines the original `Matcher` with an optional replacement string - Implement URL replacement for links and images - Refactor selector error handling to use proper `Result<Select, SelectError>` semantics instead of misusing `Err(MdElem)` for "no match" cases. The new `Select` enum is now how we represent "matched element(s), or the unmatched original" ## Breaking changes - `MatchReplace` replaces `Matcher` in `Selector`s: All selector matchers now take `MatchReplace` instead of `Matcher` - New `crate::run::Error::SelectionError` variant, with an accompanying struct - `Table::retain_columns_by_header` and `Table::retain_rows` now require an additional error type parameter, representing a possible error coming from the passed-in function ## Review note This PR is for a feature/integration branch. Please see the contributing commits (each of which has its own PR) for the individual steps. Resolves #277
1 parent 8eb6409 commit 61a61bc

26 files changed

+1377
-321
lines changed

.github/workflows/formatting.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ on:
33
push:
44
branches: [ "main" ]
55
pull_request:
6-
branches: [ "main" ]
6+
branches: [ "main", "feature/*" ]
77

88
jobs:
99
newlines:

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
push:
55
branches: [ "main" ]
66
pull_request:
7-
branches: [ "main" ]
7+
branches: [ "main", "feature/*" ]
88

99
env:
1010
CARGO_TERM_COLOR: always
@@ -110,7 +110,7 @@ jobs:
110110
run: |
111111
if ! "$RUNNER_TEMP/cargo-semver-checks" semver-checks --baseline-rev "origin/$GITHUB_BASE_REF" ; then
112112
# There were breaking changes. Make sure we have the appropriate label!
113-
breaking_change_label_count="$(gh pr view 364 --json labels | jq '.labels | select("breaking change") | length')"
113+
breaking_change_label_count="$(gh pr view ${{ github.event.number }} --json labels | jq '.labels | map(select(.name == "breaking change")) | length')"
114114
if [[ "$breaking_change_label_count" != 1 ]]; then
115115
echo "::error title=semver-checks::semver-checks found breaking changes, but the 'breaking change' label isn't applied. Please add that label."
116116
exit 1

.github/workflows/system-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ on:
33
push:
44
branches: [ "main" ]
55
pull_request:
6-
branches: [ "main" ]
6+
branches: [ "main", "feature/*" ]
77
workflow_dispatch: { }
88

99
jobs:

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
//! let selector: mdq::select::Selector = query_text.try_into()?;
5151
//!
5252
//! // Run the selector against the parsed Markdown
53-
//! let (found_nodes, found_nodes_ctx) = selector.find_nodes(parsed_md);
53+
//! let (found_nodes, found_nodes_ctx) = selector.find_nodes(parsed_md)?;
5454
//!
5555
//! // Output. Note our use of
5656
//! let mut output_string = String::new();

src/md_elem/tree_ref.rs

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,15 @@ mod elem_ref {
3737
}
3838
}
3939

40-
pub fn retain_columns_by_header<F>(&mut self, mut f: F)
40+
pub fn retain_columns_by_header<F, E>(&mut self, mut f: F) -> Result<(), E>
4141
where
42-
F: FnMut(&TableCell) -> bool,
42+
F: FnMut(&TableCell) -> Result<bool, E>,
4343
{
4444
let Some(first_row) = self.rows.first() else {
45-
return;
45+
return Ok(());
4646
};
4747
let mut keeper_indices = IndexKeeper::new();
48-
keeper_indices.retain_when(first_row, |_, cell| f(cell));
48+
keeper_indices.retain_when(first_row, |_, cell| f(cell))?;
4949

5050
match keeper_indices.count_keeps() {
5151
0 => {
@@ -58,24 +58,30 @@ mod elem_ref {
5858
}
5959
_ => {
6060
// some columns match: retain those, and discard the rest
61-
self.alignments.retain_with_index(keeper_indices.retain_fn());
61+
self.alignments.retain_with_index(keeper_indices.retain_fn())?;
6262
for row in self.rows.iter_mut() {
63-
row.retain_with_index(keeper_indices.retain_fn());
63+
row.retain_with_index(keeper_indices.retain_fn())?;
6464
}
6565
}
6666
}
67+
Ok(())
6768
}
6869

69-
pub fn retain_rows<F>(&mut self, mut f: F)
70+
pub fn retain_rows<F, E>(&mut self, mut f: F) -> Result<(), E>
7071
where
71-
F: FnMut(&TableCell) -> bool,
72+
F: FnMut(&TableCell) -> Result<bool, E>,
7273
{
7374
self.rows.retain_with_index(|idx, row| {
7475
if idx == 0 {
75-
return true;
76+
return Ok(true);
7677
}
77-
row.iter().any(&mut f)
78-
});
78+
for cell in row {
79+
if f(cell)? {
80+
return Ok(true);
81+
}
82+
}
83+
Ok(false)
84+
})
7985
}
8086

8187
pub fn is_empty(&self) -> bool {
@@ -102,7 +108,7 @@ mod tests {
102108
vec!["data 1 a", "data 1 b", "data 1 c"],
103109
vec!["data 2 a", "data 2 b", "KEEPER c"],
104110
]);
105-
table.retain_columns_by_header(cell_matches("KEEPER"));
111+
table.retain_columns_by_header(cell_matches("KEEPER")).unwrap();
106112

107113
// note: "KEEPER" is in the last column, but not in the header; only the header gets
108114
// matched.
@@ -123,10 +129,12 @@ mod tests {
123129
table.normalize();
124130

125131
let mut seen_lines = Vec::with_capacity(3);
126-
table.retain_columns_by_header(|line| {
127-
seen_lines.push(simple_to_string(line));
128-
true
129-
});
132+
table
133+
.retain_columns_by_header(|line| {
134+
seen_lines.push(simple_to_string(line));
135+
Ok::<_, ()>(true)
136+
})
137+
.unwrap();
130138

131139
// normalization
132140
assert_eq!(
@@ -154,7 +162,7 @@ mod tests {
154162
vec!["data 1 a", "data 1 b", "data 1 c"],
155163
vec!["data 2 a", "KEEPER b", "data 2 c"],
156164
]);
157-
table.retain_rows(cell_matches("KEEPER"));
165+
table.retain_rows(cell_matches("KEEPER")).unwrap();
158166

159167
assert_eq!(
160168
table.alignments,
@@ -187,10 +195,12 @@ mod tests {
187195
// retain only the rows with empty cells. This lets us get around the short-circuiting
188196
// of retain_rows (it short-circuits within each row as soon as it finds a matching
189197
// cell), to validate that the normalization works as expected.
190-
table.retain_rows(|line| {
191-
seen_lines.push(simple_to_string(line));
192-
line.is_empty()
193-
});
198+
table
199+
.retain_rows(|line| {
200+
seen_lines.push(simple_to_string(line));
201+
Ok::<_, ()>(line.is_empty())
202+
})
203+
.unwrap();
194204

195205
// normalization
196206
assert_eq!(
@@ -219,10 +229,10 @@ mod tests {
219229
);
220230
}
221231

222-
fn cell_matches(substring: &str) -> impl Fn(&TableCell) -> bool + '_ {
232+
fn cell_matches(substring: &str) -> impl Fn(&TableCell) -> Result<bool, ()> + '_ {
223233
move |line| {
224234
let line_str = format!("{:?}", line);
225-
line_str.contains(substring)
235+
Ok(line_str.contains(substring))
226236
}
227237
}
228238

src/query/grammar.pest

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,15 @@ string = {
6969
asterisk = @{ "*" }
7070
unquoted_string = @{ LETTER ~ (!(PEEK | "$") ~ ANY)* }
7171

72-
regex = ${ "/" ~ regex_char* ~ "/" }
72+
regex = ${
73+
// Put these into a single rule, so that the error message just says "regex" for both the plain and replace variant.
74+
("/" ~ regex_char* ~ "/")
75+
| ("!s/" ~ regex_char* ~ "/" ~ regex_replacement_segment? ~ "/")
76+
}
7377
regex_char = ${
7478
(regex_escaped_slash | regex_normal_char)
7579
}
80+
regex_replacement_segment = ${ regex_char+ }
7681
regex_escaped_slash = @{ "\\/" }
7782
regex_normal_char = @{ !("/") ~ ANY }
7883

src/query/matcher_try_from.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,34 @@
11
use crate::query::strings::{ParsedString, ParsedStringMode};
22
use crate::query::{DetachedSpan, InnerParseError, Pair};
3-
use crate::select::{Matcher, Regex};
3+
use crate::select::{MatchReplace, Matcher, Regex};
44
use fancy_regex::Error;
55

6-
impl Matcher {
6+
impl MatchReplace {
77
pub(crate) fn try_from(pair: Option<Pair>) -> Result<Self, InnerParseError> {
88
let Some(pair) = pair else {
9-
return Ok(Self::Any { explicit: false });
9+
return Ok(Self {
10+
matcher: Matcher::Any { explicit: false },
11+
replacement: None,
12+
});
1013
};
1114
let span = DetachedSpan::from(&pair);
1215
let parsed_string = ParsedString::new_from_pairs(pair.into_inner())?;
1316
if parsed_string.is_equivalent_to_asterisk() {
14-
return Ok(Self::Any {
15-
explicit: parsed_string.explicit_wildcard,
17+
return Ok(Self {
18+
matcher: Matcher::Any {
19+
explicit: parsed_string.explicit_wildcard,
20+
},
21+
replacement: None,
1622
});
1723
}
1824
let matcher = match parsed_string.mode {
19-
ParsedStringMode::CaseSensitive => Self::Text {
25+
ParsedStringMode::CaseSensitive => Matcher::Text {
2026
case_sensitive: true,
2127
anchor_start: parsed_string.anchor_start,
2228
text: parsed_string.text,
2329
anchor_end: parsed_string.anchor_end,
2430
},
25-
ParsedStringMode::CaseInsensitive => Self::Text {
31+
ParsedStringMode::CaseInsensitive => Matcher::Text {
2632
case_sensitive: false,
2733
anchor_start: parsed_string.anchor_start,
2834
text: parsed_string.text,
@@ -43,9 +49,12 @@ impl Matcher {
4349
}
4450
}
4551
})?;
46-
Self::Regex(Regex { re })
52+
Matcher::Regex(Regex { re })
4753
}
4854
};
49-
Ok(matcher)
55+
Ok(Self {
56+
matcher,
57+
replacement: parsed_string.replace_string,
58+
})
5059
}
5160
}

src/query/pest.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ impl Query {
8888
Rule::regex_char => "regex character",
8989
Rule::regex_escaped_slash => "_/_",
9090
Rule::regex_normal_char => "regex character",
91+
Rule::regex_replacement_segment => "regex_replacement_segment",
9192
Rule::quoted_string => "quoted string",
9293
Rule::quoted_char => "character in quoted string",
9394
Rule::asterisk => "_*_",

0 commit comments

Comments
 (0)