Skip to content

Commit 8aede03

Browse files
committed
Switch to regex-automata
1 parent 10b13f1 commit 8aede03

File tree

7 files changed

+107
-33
lines changed

7 files changed

+107
-33
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ libc = "0.2.42"
2626
memmem = "0.1.1"
2727
num_enum = "0.7"
2828
rand = "0.8"
29-
regex = { version = "1.3.6" }
29+
regex-automata = { version = "0.4.9" }
3030
serde = { version = "1.0.113", features = [ "derive" ] }
3131
serde_json = "1.0.56"
3232
sliceslice = "0.4.3"

engine/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ cidr.workspace = true
2626
fnv.workspace = true
2727
memmem.workspace = true
2828
rand.workspace = true
29-
regex = { workspace = true, optional = true }
29+
regex-automata = { workspace = true, optional = true }
3030
serde.workspace = true
3131
serde_json.workspace = true
3232
sliceslice.workspace = true
@@ -38,4 +38,5 @@ criterion.workspace = true
3838
indoc.workspace = true
3939

4040
[features]
41+
regex = ["dep:regex-automata"]
4142
default = [ "regex" ]

engine/src/rhs_types/regex/imp_real.rs

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,92 @@
1-
use crate::{ParserSettings, RegexFormat};
1+
use regex_automata::MatchKind;
22

3-
pub use regex::Error;
3+
use super::Error;
4+
use crate::{ParserSettings, RegexFormat};
5+
use std::ops::Deref;
6+
use std::sync::Arc;
47

5-
/// Wrapper around [`regex::bytes::Regex`]
8+
/// Wrapper around [`regex_automata::meta::Regex`]
69
#[derive(Clone)]
710
pub struct Regex {
8-
compiled_regex: regex::bytes::Regex,
11+
pattern: Arc<str>,
12+
regex: regex_automata::meta::Regex,
913
format: RegexFormat,
1014
}
1115

1216
impl Regex {
17+
/// Retrieves the syntax configuration that will be used to build the regex.
18+
#[inline]
19+
pub fn syntax_config() -> regex_automata::util::syntax::Config {
20+
regex_automata::util::syntax::Config::new()
21+
.unicode(false)
22+
.utf8(false)
23+
}
24+
25+
/// Retrieves the meta configuration that will be used to build the regex.
26+
#[inline]
27+
pub fn meta_config(settings: &ParserSettings) -> regex_automata::meta::Config {
28+
regex_automata::meta::Config::new()
29+
.match_kind(MatchKind::LeftmostFirst)
30+
.utf8_empty(false)
31+
.dfa(false)
32+
.nfa_size_limit(Some(settings.regex_compiled_size_limit))
33+
.onepass_size_limit(Some(settings.regex_compiled_size_limit))
34+
.dfa_size_limit(Some(settings.regex_compiled_size_limit))
35+
.hybrid_cache_capacity(settings.regex_dfa_size_limit)
36+
}
37+
1338
/// Compiles a regular expression.
1439
pub fn new(
1540
pattern: &str,
1641
format: RegexFormat,
1742
settings: &ParserSettings,
1843
) -> Result<Self, Error> {
19-
::regex::bytes::RegexBuilder::new(pattern)
20-
.unicode(false)
21-
.size_limit(settings.regex_compiled_size_limit)
22-
.dfa_size_limit(settings.regex_dfa_size_limit)
23-
.build()
24-
.map(|r| Regex {
25-
compiled_regex: r,
44+
::regex_automata::meta::Builder::new()
45+
.configure(Self::meta_config(settings))
46+
.syntax(Self::syntax_config())
47+
.build(pattern)
48+
.map(|regex| Regex {
49+
pattern: Arc::from(pattern),
50+
regex,
2651
format,
2752
})
53+
.map_err(|err| {
54+
if let Some(limit) = err.size_limit() {
55+
Error::CompiledTooBig(limit)
56+
} else if let Some(syntax) = err.syntax_error() {
57+
Error::Syntax(syntax.to_string())
58+
} else {
59+
unreachable!()
60+
}
61+
})
2862
}
2963

30-
/// Returns true if and only if the regex matches the string given.
31-
pub fn is_match(&self, text: &[u8]) -> bool {
32-
self.compiled_regex.is_match(text)
33-
}
34-
35-
/// Returns the original string of this regex.
64+
/// Returns the pattern of this regex.
65+
#[inline]
3666
pub fn as_str(&self) -> &str {
37-
self.compiled_regex.as_str()
67+
&self.pattern
3868
}
3969

40-
/// Returns the format behind the regex
70+
/// Returns the format used by the pattern.
71+
#[inline]
4172
pub fn format(&self) -> RegexFormat {
4273
self.format
4374
}
4475
}
4576

46-
impl From<Regex> for regex::bytes::Regex {
77+
impl From<Regex> for regex_automata::meta::Regex {
78+
#[inline]
4779
fn from(regex: Regex) -> Self {
48-
regex.compiled_regex
80+
regex.regex
81+
}
82+
}
83+
84+
impl Deref for Regex {
85+
type Target = regex_automata::meta::Regex;
86+
87+
#[inline]
88+
fn deref(&self) -> &Self::Target {
89+
&self.regex
4990
}
5091
}
5192

engine/src/rhs_types/regex/mod.rs

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@ use crate::rhs_types::bytes::lex_raw_string_as_str;
33
use crate::FilterParser;
44
use cfg_if::cfg_if;
55
use serde::{Serialize, Serializer};
6-
use std::{
7-
fmt::{self, Debug, Formatter},
8-
hash::{Hash, Hasher},
9-
};
6+
use std::fmt::{self, Debug, Display, Formatter};
7+
use std::hash::{Hash, Hasher};
8+
use thiserror::Error;
109

1110
cfg_if! {
1211
if #[cfg(feature = "regex")] {
@@ -19,7 +18,7 @@ cfg_if! {
1918
}
2019

2120
/// RegexFormat describes the format behind the regex
22-
#[derive(PartialEq, Eq, Copy, Clone)]
21+
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
2322
pub enum RegexFormat {
2423
/// Literal string was used to define the expression
2524
Literal,
@@ -41,9 +40,20 @@ impl Hash for Regex {
4140
}
4241
}
4342

43+
impl Display for Regex {
44+
/// Shows the original regular expression.
45+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
46+
write!(f, "{}", self.as_str())
47+
}
48+
}
49+
4450
impl Debug for Regex {
51+
/// Shows the original regular expression.
4552
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
46-
f.write_str(self.as_str())
53+
f.debug_struct("Regex")
54+
.field("pattern", &self.as_str())
55+
.field("format", &self.format())
56+
.finish()
4757
}
4858
}
4959

@@ -120,6 +130,28 @@ impl Serialize for Regex {
120130
}
121131
}
122132

133+
/// An error that occurred during parsing or compiling a regular expression.
134+
#[non_exhaustive]
135+
#[derive(Clone, Debug, Error, PartialEq)]
136+
pub enum Error {
137+
/// A syntax error.
138+
Syntax(String),
139+
/// The compiled regex exceeded the configured
140+
/// regex compiled size limit.
141+
CompiledTooBig(usize),
142+
}
143+
144+
impl Display for Error {
145+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
146+
match *self {
147+
Error::Syntax(ref err) => Display::fmt(err, f),
148+
Error::CompiledTooBig(limit) => {
149+
write!(f, "Compiled regex exceeds size limit of {} bytes.", limit)
150+
}
151+
}
152+
}
153+
}
154+
123155
#[cfg(test)]
124156
mod test {
125157
use super::*;

ffi/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ wirefilter.workspace = true
2525

2626
[dev-dependencies]
2727
indoc.workspace = true
28-
regex.workspace = true
28+
regex-automata.workspace = true
2929

3030
[build-dependencies]
3131
cbindgen = "0.27"

ffi/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,7 @@ pub extern "C" fn wirefilter_get_version() -> StaticRustAllocatedString {
10391039
#[allow(clippy::bool_assert_comparison)]
10401040
mod ffi_test {
10411041
use super::*;
1042-
use regex::Regex;
1042+
use regex_automata::meta::Regex;
10431043
use std::ffi::CStr;
10441044

10451045
impl RustAllocatedString {

0 commit comments

Comments
 (0)