Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ libc = "0.2.42"
memmem = "0.1.1"
num_enum = "0.7"
rand = "0.8"
regex = { version = "1.3.6" }
regex-automata = { version = "0.4.9" }
serde = { version = "1.0.113", features = [ "derive" ] }
serde_json = "1.0.56"
sliceslice = "0.4.3"
Expand Down
3 changes: 2 additions & 1 deletion engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ cidr.workspace = true
fnv.workspace = true
memmem.workspace = true
rand.workspace = true
regex = { workspace = true, optional = true }
regex-automata = { workspace = true, optional = true }
serde.workspace = true
serde_json.workspace = true
sliceslice.workspace = true
Expand All @@ -38,4 +38,5 @@ criterion.workspace = true
indoc.workspace = true

[features]
regex = ["dep:regex-automata"]
default = [ "regex" ]
83 changes: 62 additions & 21 deletions engine/src/rhs_types/regex/imp_real.rs
Original file line number Diff line number Diff line change
@@ -1,51 +1,92 @@
use crate::{ParserSettings, RegexFormat};
use regex_automata::MatchKind;

pub use regex::Error;
use super::Error;
use crate::{ParserSettings, RegexFormat};
use std::ops::Deref;
use std::sync::Arc;

/// Wrapper around [`regex::bytes::Regex`]
/// Wrapper around [`regex_automata::meta::Regex`]
#[derive(Clone)]
pub struct Regex {
compiled_regex: regex::bytes::Regex,
pattern: Arc<str>,
regex: regex_automata::meta::Regex,
format: RegexFormat,
}

impl Regex {
/// Retrieves the syntax configuration that will be used to build the regex.
#[inline]
pub fn syntax_config() -> regex_automata::util::syntax::Config {
regex_automata::util::syntax::Config::new()
.unicode(false)
.utf8(false)
}

/// Retrieves the meta configuration that will be used to build the regex.
#[inline]
pub fn meta_config(settings: &ParserSettings) -> regex_automata::meta::Config {
regex_automata::meta::Config::new()
.match_kind(MatchKind::LeftmostFirst)
.utf8_empty(false)
.dfa(false)
.nfa_size_limit(Some(settings.regex_compiled_size_limit))
.onepass_size_limit(Some(settings.regex_compiled_size_limit))
.dfa_size_limit(Some(settings.regex_compiled_size_limit))
.hybrid_cache_capacity(settings.regex_dfa_size_limit)
}

/// Compiles a regular expression.
pub fn new(
pattern: &str,
format: RegexFormat,
settings: &ParserSettings,
) -> Result<Self, Error> {
::regex::bytes::RegexBuilder::new(pattern)
.unicode(false)
.size_limit(settings.regex_compiled_size_limit)
.dfa_size_limit(settings.regex_dfa_size_limit)
.build()
.map(|r| Regex {
compiled_regex: r,
::regex_automata::meta::Builder::new()
.configure(Self::meta_config(settings))
.syntax(Self::syntax_config())
.build(pattern)
.map(|regex| Regex {
pattern: Arc::from(pattern),
regex,
format,
})
.map_err(|err| {
if let Some(limit) = err.size_limit() {
Error::CompiledTooBig(limit)
} else if let Some(syntax) = err.syntax_error() {
Error::Syntax(syntax.to_string())
} else {
unreachable!()
}
})
}

/// Returns true if and only if the regex matches the string given.
pub fn is_match(&self, text: &[u8]) -> bool {
self.compiled_regex.is_match(text)
}

/// Returns the original string of this regex.
/// Returns the pattern of this regex.
#[inline]
pub fn as_str(&self) -> &str {
self.compiled_regex.as_str()
&self.pattern
}

/// Returns the format behind the regex
/// Returns the format used by the pattern.
#[inline]
pub fn format(&self) -> RegexFormat {
self.format
}
}

impl From<Regex> for regex::bytes::Regex {
impl From<Regex> for regex_automata::meta::Regex {
#[inline]
fn from(regex: Regex) -> Self {
regex.compiled_regex
regex.regex
}
}

impl Deref for Regex {
type Target = regex_automata::meta::Regex;

#[inline]
fn deref(&self) -> &Self::Target {
&self.regex
}
}

Expand Down
44 changes: 38 additions & 6 deletions engine/src/rhs_types/regex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ use crate::rhs_types::bytes::lex_raw_string_as_str;
use crate::FilterParser;
use cfg_if::cfg_if;
use serde::{Serialize, Serializer};
use std::{
fmt::{self, Debug, Formatter},
hash::{Hash, Hasher},
};
use std::fmt::{self, Debug, Display, Formatter};
use std::hash::{Hash, Hasher};
use thiserror::Error;

cfg_if! {
if #[cfg(feature = "regex")] {
Expand All @@ -19,7 +18,7 @@ cfg_if! {
}

/// RegexFormat describes the format behind the regex
#[derive(PartialEq, Eq, Copy, Clone)]
#[derive(PartialEq, Eq, Copy, Clone, Debug)]
pub enum RegexFormat {
/// Literal string was used to define the expression
Literal,
Expand All @@ -41,9 +40,20 @@ impl Hash for Regex {
}
}

impl Display for Regex {
/// Shows the original regular expression.
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}

impl Debug for Regex {
/// Shows the original regular expression.
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
f.debug_struct("Regex")
.field("pattern", &self.as_str())
.field("format", &self.format())
.finish()
}
}

Expand Down Expand Up @@ -120,6 +130,28 @@ impl Serialize for Regex {
}
}

/// An error that occurred during parsing or compiling a regular expression.
#[non_exhaustive]
#[derive(Clone, Debug, Error, PartialEq)]
pub enum Error {
/// A syntax error.
Syntax(String),
/// The compiled regex exceeded the configured
/// regex compiled size limit.
CompiledTooBig(usize),
}

impl Display for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match *self {
Error::Syntax(ref err) => Display::fmt(err, f),
Error::CompiledTooBig(limit) => {
write!(f, "Compiled regex exceeds size limit of {} bytes.", limit)
}
}
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ wirefilter.workspace = true

[dev-dependencies]
indoc.workspace = true
regex.workspace = true
regex-automata.workspace = true

[build-dependencies]
cbindgen = "0.27"
Expand Down
2 changes: 1 addition & 1 deletion ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ pub extern "C" fn wirefilter_get_version() -> StaticRustAllocatedString {
#[allow(clippy::bool_assert_comparison)]
mod ffi_test {
use super::*;
use regex::Regex;
use regex_automata::meta::Regex;
use std::ffi::CStr;

impl RustAllocatedString {
Expand Down
Loading