Skip to content

Commit c22c3d1

Browse files
authored
use fancy-regex
- change the library to use fancy-regex (this uses the old regex under the hood) - update the error handling: fancy-regex's default error handling wasn't very nice as an inner error, so I unwrapped it for nicer display - add a unit test to smoke test the new available functionality. I only picked one new bit of functionality (lookahead) because I'm interested in verifying that fancy-regex is being used at all, not in testing the library This is not breaking, because the regex library that mdq uses is intentionally obscured in the public API. resolves #121
1 parent c7567f2 commit c22c3d1

File tree

7 files changed

+65
-27
lines changed

7 files changed

+65
-27
lines changed

Cargo.lock

Lines changed: 27 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ memchr = "2.7.4"
1818
paste = "1.0"
1919
pest = "2.8"
2020
pest_derive = { version = "2.8", features = ["grammar-extras"] }
21-
regex = "1.10.4"
21+
fancy-regex = "0.14"
2222
serde = { version = "1", features = ["derive"] }
2323
serde_json = "1.0"
2424

src/query/matcher_try_from.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::query::strings::{ParsedString, ParsedStringMode};
22
use crate::query::{DetachedSpan, InnerParseError, Pair};
33
use crate::select::{Matcher, Regex};
4+
use fancy_regex::Error;
45

56
impl Matcher {
67
pub(crate) fn try_from(pair: Option<Pair>) -> Result<Self, InnerParseError> {
@@ -28,8 +29,20 @@ impl Matcher {
2829
anchor_end: parsed_string.anchor_end,
2930
},
3031
ParsedStringMode::Regex => {
31-
let re =
32-
regex::Regex::new(&parsed_string.text).map_err(|e| InnerParseError::Other(span, e.to_string()))?;
32+
let re = fancy_regex::Regex::new(&parsed_string.text).map_err(|e| {
33+
match e {
34+
Error::ParseError(pos, err) => {
35+
let mut re_span = span;
36+
re_span.start += pos + 1; // +1 for the regex's opening slash
37+
re_span.end = re_span.start;
38+
InnerParseError::Other(re_span, format!("regex parse error: {err}"))
39+
}
40+
err => {
41+
// not expected, but we'll handle it anyway
42+
InnerParseError::Other(span, err.to_string())
43+
}
44+
}
45+
})?;
3346
Self::Regex(Regex { re })
3447
}
3548
};

src/query/selector_try_from.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ mod tests {
646646
"</> /<div.*>/",
647647
Selector::Html(HtmlMatcher {
648648
html: Matcher::Regex(Regex {
649-
re: regex::Regex::new("<div.*>").unwrap(),
649+
re: fancy_regex::Regex::new("<div.*>").unwrap(),
650650
}),
651651
}),
652652
)

src/select/matcher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ pub enum Matcher {
2828
/// The actual regex library is intentionally obscured so that it can change in the future without breaking the API.
2929
#[derive(Debug, Clone)]
3030
pub struct Regex {
31-
pub(crate) re: regex::Regex,
31+
pub(crate) re: fancy_regex::Regex,
3232
}
3333

3434
impl PartialEq for Regex {

src/select/string_matcher.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::md_elem::elem::*;
22
use crate::md_elem::*;
33
use crate::output::inlines_to_plain_string;
44
use crate::select::Matcher;
5-
use regex::Regex;
5+
use fancy_regex::Regex;
66
use std::borrow::Borrow;
77

88
#[derive(Debug)]
@@ -18,7 +18,12 @@ impl PartialEq for StringMatcher {
1818

1919
impl StringMatcher {
2020
pub fn matches(&self, haystack: &str) -> bool {
21-
self.re.is_match(haystack)
21+
match self.re.is_match(haystack) {
22+
Ok(m) => m,
23+
Err(e) => {
24+
panic!("failed to evaluate regular expression: {e}");
25+
}
26+
}
2227
}
2328

2429
pub fn matches_inlines<I: Borrow<Inline>>(&self, haystack: &[I]) -> bool {
@@ -106,7 +111,7 @@ impl SubstringToRegex {
106111
if self.anchor_start {
107112
pattern.push('^');
108113
}
109-
pattern.push_str(&regex::escape(&self.look_for));
114+
pattern.push_str(&fancy_regex::escape(&self.look_for));
110115
if self.anchor_end {
111116
pattern.push('$');
112117
}
@@ -315,6 +320,15 @@ mod test {
315320
parse_and_check_with(StringVariant::AngleBracket, "> rest", StringMatcher::any(), "> rest");
316321
}
317322

323+
/// Test for fancy_regex specific feature (lookaround)
324+
#[test]
325+
fn fancy_regex_lookahead() {
326+
let matcher = re(r#"foo(?=bar)"#); // Positive lookahead: matches "foo" only if followed by "bar"
327+
assert!(matcher.matches("foobar"));
328+
assert!(!matcher.matches("foo"));
329+
assert!(!matcher.matches("foobaz"));
330+
}
331+
318332
fn parse_and_check_with(
319333
string_variant: StringVariant,
320334
text: &str,

tests/md_cases/bad_queries.toml

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,15 +61,12 @@ cli_args = ['# /\P{/']
6161
expect_success = false
6262
output = ''
6363
output_err = '''Syntax error in select specifier:
64-
--> 1:3
64+
--> 1:4
6565
|
6666
1 | # /\P{/
67-
| ^---^
67+
| ^
6868
|
69-
= regex parse error:
70-
\P{
71-
^
72-
error: incomplete escape sequence, reached end of pattern prematurely
69+
= regex parse error: Unicode escape not closed
7370
'''
7471

7572
[expect."bareword isn't closed"]

0 commit comments

Comments
 (0)