From 6770a7784dec51f5d262e32dbf752e0d53aa9923 Mon Sep 17 00:00:00 2001 From: LingyuCoder Date: Tue, 5 Aug 2025 18:47:31 +0800 Subject: [PATCH 1/4] perf: create native regex first --- Cargo.lock | 1 + .../raw_builtins/raw_lazy_compilation.rs | 2 +- crates/rspack_regex/Cargo.toml | 1 + crates/rspack_regex/src/lib.rs | 223 ++++++++++++++---- crates/rspack_regex/src/napi.rs | 4 +- 5 files changed, 177 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5eb5bc73ae37..19e779a0724d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4921,6 +4921,7 @@ version = "0.4.11" dependencies = [ "cow-utils", "napi", + "regex", "regex-syntax 0.8.5", "regress", "rspack_cacheable", diff --git a/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs b/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs index e49b931f6aad..c74601f4a637 100644 --- a/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs +++ b/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs @@ -69,7 +69,7 @@ impl From for LazyCompilationTest fn from(value: RawLazyCompilationTest) -> Self { match value.0 { Either::A(regex) => Self::Regex( - RspackRegex::with_flags(®ex.source, ®ex.flags).unwrap_or_else(|_| { + RspackRegex::with_flags(regex.source(), regex.flags()).unwrap_or_else(|_| { let msg = format!("[lazyCompilation]incorrect regex {regex:?}"); panic!("{msg}"); }), diff --git a/crates/rspack_regex/Cargo.toml b/crates/rspack_regex/Cargo.toml index 2a15993b36ad..628a45996277 100644 --- a/crates/rspack_regex/Cargo.toml +++ b/crates/rspack_regex/Cargo.toml @@ -11,6 +11,7 @@ version.workspace = true [dependencies] cow-utils = { workspace = true } napi = { workspace = true } +regex = { workspace = true } regex-syntax = { workspace = true } regress = { workspace = true, features = ["backend-pikevm", "std"] } rspack_cacheable = { workspace = true } diff --git a/crates/rspack_regex/src/lib.rs b/crates/rspack_regex/src/lib.rs index 411ee6b481cb..f542c662b5b7 100644 --- a/crates/rspack_regex/src/lib.rs +++ b/crates/rspack_regex/src/lib.rs @@ -8,102 +8,220 @@ use rspack_cacheable::{ cacheable, with::{AsString, AsStringConverter}, }; -use rspack_error::Error; +use rspack_error::{Error, error}; use swc_core::ecma::ast::Regex as SwcRegex; use self::algo::Algo; -/// Using wrapper type required by [TryFrom] trait #[cacheable(with=AsString)] -#[derive(Clone)] -pub struct RspackRegex { - algo: Box, - pub flags: String, - pub source: String, -} - -impl PartialEq for RspackRegex { - fn eq(&self, other: &Self) -> bool { - self.flags == other.flags && self.source == other.source - } -} - -impl Eq for RspackRegex {} - -impl std::hash::Hash for RspackRegex { - fn hash(&self, state: &mut H) { - self.flags.hash(state); - self.source.hash(state); - } -} - -impl Debug for RspackRegex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RspackRegex") - .field("flags", &self.flags) - .field("source", &self.source) - .finish() - } +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub enum RspackRegex { + Regex(RspackNativeRegex), + Regress(RspackRegressRegex), } impl RspackRegex { #[inline] pub fn test(&self, text: &str) -> bool { - self.algo.test(text) + match self { + Self::Regex(regex) => regex.regex.is_match(text), + Self::Regress(regress) => regress.algo.test(text), + } } #[inline] pub fn global(&self) -> bool { - self.algo.global() + match self { + // return false for native regex otherwise context options will emit warning + // but it is safe to do so because we can not use regex to capture multiple matches + Self::Regex(regex) => regex.flags.contains('g'), + Self::Regress(regress) => regress.algo.global(), + } } #[inline] pub fn sticky(&self) -> bool { - self.algo.sticky() + match self { + Self::Regex(regex) => regex.flags.contains('y'), + Self::Regress(regress) => regress.algo.sticky(), + } } #[inline] pub fn source(&self) -> &str { - &self.source + match self { + Self::Regex(regex) => ®ex.source, + Self::Regress(regress) => ®ress.source, + } } #[inline] pub fn flags(&self) -> &str { - &self.flags + match self { + Self::Regex(regex) => ®ex.flags, + Self::Regress(regress) => ®ress.flags, + } } #[inline] pub fn new(expr: &str) -> Result { - Self::with_flags(expr, "") + match RspackNativeRegex::with_flags(expr, "") { + Ok(regex) => Ok(Self::Regex(regex)), + Err(e) => { + println!("create native regex failed: {expr} {e}"); + let regress = RspackRegressRegex::with_flags(expr, "")?; + Ok(Self::Regress(regress)) + } + } } pub fn with_flags(expr: &str, flags: &str) -> Result { - let mut chars = flags.chars().collect::>(); - chars.sort_unstable(); - Ok(Self { - flags: chars.into_iter().collect::(), - source: expr.to_string(), - algo: Box::new(Algo::new(expr, flags)?), - }) + match RspackNativeRegex::with_flags(expr, flags) { + Ok(regex) => Ok(Self::Regex(regex)), + Err(e) => { + println!("create native regex failed: {expr} with {flags} {e}"); + let regress = RspackRegressRegex::with_flags(expr, flags)?; + Ok(Self::Regress(regress)) + } + } } // https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/dependencies/ContextDependency.js#L30 #[inline] pub fn to_source_string(&self) -> String { - format!("/{}/{}", self.source, self.flags) + match self { + Self::Regex(regex) => format!("/{}/{}", regex.source, regex.flags), + Self::Regress(regress) => format!("/{}/{}", regress.source, regress.flags), + } } // https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/ContextModule.js#L192 #[inline] pub fn to_pretty_string(&self, strip_slash: bool) -> String { - if strip_slash { - format!("{}{}", self.source, self.flags) + let res = if strip_slash { + match self { + Self::Regex(regex) => format!("{}{}", regex.source, regex.flags), + Self::Regress(regress) => format!("{}{}", regress.source, regress.flags), + } } else { self.to_source_string() + }; + + res + .cow_replace('!', "%21") + .cow_replace('|', "%7C") + .into_owned() + } +} + +#[derive(Clone, Debug)] +pub struct RspackNativeRegex { + regex: regex::Regex, + flags: String, + source: String, +} + +impl std::hash::Hash for RspackNativeRegex { + fn hash(&self, state: &mut H) { + self.flags.hash(state); + self.source.hash(state); + } +} + +impl PartialEq for RspackNativeRegex { + fn eq(&self, other: &Self) -> bool { + self.flags == other.flags && self.source == other.source + } +} + +impl Eq for RspackNativeRegex {} + +impl RspackNativeRegex { + pub fn with_flags(expr: &str, raw_flags: &str) -> Result { + let pattern = expr.replace("\\/", "/"); + + let mut flags = raw_flags.chars().collect::>(); + flags.sort_unstable(); + let mut applied_flags = String::new(); + // https://github.com/vercel/next.js/blob/203adbd5d054609812d1f3666184875dcca13f3a/turbopack/crates/turbo-esregex/src/lib.rs#L71-L94 + for flag in &flags { + match flag { + // indices for substring matches: not relevant for the regex itself + 'd' => {} + // global: default in rust, ignore + 'g' => {} + // case-insensitive: letters match both upper and lower case + 'i' => applied_flags.push('i'), + // multi-line mode: ^ and $ match begin/end of line + 'm' => applied_flags.push('m'), + // allow . to match \n + 's' => applied_flags.push('s'), + // Unicode support (enabled by default) + 'u' => applied_flags.push('u'), + // sticky search: not relevant for the regex itself + 'y' => {} + _ => { + return Err(error!( + "unsupported flag `{flag}` in regex: `{pattern}` with flags: `{raw_flags}`" + )); + } + } } - .cow_replace('!', "%21") - .cow_replace('|', "%7C") - .into_owned() + + let regex = if applied_flags.is_empty() { + regex::Regex::new(&pattern).map_err(|e| error!(e))? + } else { + regex::Regex::new(&format!("(?{applied_flags}){pattern}")).map_err(|e| error!(e))? + }; + + Ok(Self { + regex, + flags: flags.into_iter().collect::(), + source: expr.to_string(), + }) + } +} + +#[derive(Clone)] +pub struct RspackRegressRegex { + algo: Box, + pub flags: String, + pub source: String, +} + +impl PartialEq for RspackRegressRegex { + fn eq(&self, other: &Self) -> bool { + self.flags == other.flags && self.source == other.source + } +} + +impl Eq for RspackRegressRegex {} + +impl std::hash::Hash for RspackRegressRegex { + fn hash(&self, state: &mut H) { + self.flags.hash(state); + self.source.hash(state); + } +} + +impl Debug for RspackRegressRegex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RspackRegressRegex") + .field("flags", &self.flags) + .field("source", &self.source) + .finish() + } +} + +impl RspackRegressRegex { + pub fn with_flags(expr: &str, flags: &str) -> Result { + let mut chars = flags.chars().collect::>(); + chars.sort_unstable(); + Ok(Self { + flags: chars.into_iter().collect::(), + source: expr.to_string(), + algo: Box::new(Algo::new(expr, flags)?), + }) } } @@ -125,7 +243,10 @@ impl TryFrom for RspackRegex { impl AsStringConverter for RspackRegex { fn to_string(&self) -> Result { - Ok(format!("{}#{}", self.flags, self.source)) + match self { + Self::Regex(regex) => Ok(format!("{}#{}", regex.flags, regex.source)), + Self::Regress(regress) => Ok(format!("{}#{}", regress.flags, regress.source)), + } } fn from_str(s: &str) -> Result where diff --git a/crates/rspack_regex/src/napi.rs b/crates/rspack_regex/src/napi.rs index 1a8e61c77ad1..bc68702c34b6 100644 --- a/crates/rspack_regex/src/napi.rs +++ b/crates/rspack_regex/src/napi.rs @@ -60,8 +60,8 @@ impl ToNapiValue for RspackRegex { let global = env.get_global()?; let regex = global.get_named_property::>("RegExp")?; - let flags = env.create_string(&val.flags)?; - let source = env.create_string(&val.source)?; + let flags = env.create_string(val.flags())?; + let source = env.create_string(val.source())?; Ok(regex.new_instance((source, flags))?.raw()) } From 82e14a84ea2777f42bd127d8917f1dddb79aefb3 Mon Sep 17 00:00:00 2001 From: LingyuCoder Date: Fri, 8 Aug 2025 14:47:12 +0800 Subject: [PATCH 2/4] perf: try to create native regex first --- .../snapshots/defaults__default_options.snap | 8 +- crates/rspack_regex/src/algo.rs | 170 ------------- crates/rspack_regex/src/lib.rs | 232 +++++------------- crates/rspack_regex/src/native.rs | 54 ++++ crates/rspack_regex/src/regress.rs | 23 ++ 5 files changed, 152 insertions(+), 335 deletions(-) delete mode 100644 crates/rspack_regex/src/algo.rs create mode 100644 crates/rspack_regex/src/native.rs create mode 100644 crates/rspack_regex/src/regress.rs diff --git a/crates/rspack/tests/snapshots/defaults__default_options.snap b/crates/rspack/tests/snapshots/defaults__default_options.snap index 64b979b9f24a..bbc4c7a8184a 100644 --- a/crates/rspack/tests/snapshots/defaults__default_options.snap +++ b/crates/rspack/tests/snapshots/defaults__default_options.snap @@ -1,6 +1,5 @@ --- source: crates/rspack/tests/defaults.rs -assertion_line: 16 expression: options --- CompilerOptions { @@ -1404,6 +1403,13 @@ CompilerOptions { ), wrapped_context_reg_exp: Some( RspackRegex { + regex: Native( + RspackNativeRegex( + Regex( + ".*", + ), + ), + ), flags: "", source: ".*", }, diff --git a/crates/rspack_regex/src/algo.rs b/crates/rspack_regex/src/algo.rs deleted file mode 100644 index 3df4d3641bda..000000000000 --- a/crates/rspack_regex/src/algo.rs +++ /dev/null @@ -1,170 +0,0 @@ -use std::{fmt::Debug, hash::Hash}; - -use regex_syntax::hir::{Hir, HirKind, Look, literal::ExtractKind}; -use regress::Match; -use rspack_error::{Error, error}; - -#[derive(Clone)] -pub struct HashRegressRegex { - pub regex: regress::Regex, - expr: String, - flags: String, -} - -impl Hash for HashRegressRegex { - fn hash(&self, state: &mut H) { - self.expr.hash(state); - self.flags.hash(state) - } -} - -impl Debug for HashRegressRegex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - Debug::fmt(&self.regex, f) - } -} - -impl HashRegressRegex { - pub(crate) fn new(expr: &str, flags: &str) -> Result { - match regress::Regex::with_flags(expr, flags) { - Ok(regex) => Ok(Self { - regex, - expr: expr.to_string(), - flags: flags.to_string(), - }), - Err(err) => Err(error!( - "Can't construct regex `/{expr}/{flags}`, original error message: {err}" - )), - } - } - - fn find(&self, text: &str) -> Option { - self.regex.find(text) - } -} - -#[derive(Clone, Debug, Hash)] -pub enum Algo { - /// Regress is considered having the same behaviors as RegExp in JS. - /// But Regress has poor performance. To improve performance of regex matching, - /// we would try to use some fast algo to do matching, when we detect some special pattern. - /// See details at https://github.com/web-infra-dev/rspack/pull/3113 - EndWith { - pats: Vec, - }, - Regress(HashRegressRegex), -} - -impl Algo { - pub(crate) fn new(expr: &str, flags: &str) -> Result { - let ignore_case = flags.contains('i') || flags.contains('g') || flags.contains('y'); - if let Some(algo) = Self::try_compile_to_end_with_fast_path(expr) - && !ignore_case - { - Ok(algo) - } else { - match HashRegressRegex::new(expr, flags) { - Ok(regex) => Ok(Algo::Regress(regex)), - Err(e) => Err(e), - } - } - } - - fn try_compile_to_end_with_fast_path(expr: &str) -> Option { - let hir = regex_syntax::parse(expr).ok()?; - let seq = regex_syntax::hir::literal::Extractor::new() - .kind(ExtractKind::Suffix) - .extract(&hir); - if is_ends_with_regex(&hir) && seq.is_exact() { - let pats = seq - .literals()? - .iter() - .map(|item| String::from_utf8_lossy(item.as_bytes()).to_string()) - .collect::>(); - - Some(Algo::EndWith { pats }) - } else { - None - } - } - - pub(crate) fn test(&self, str: &str) -> bool { - match self { - Algo::Regress(regex) => regex.find(str).is_some(), - Algo::EndWith { pats } => pats.iter().any(|pat| str.ends_with(pat)), - } - } - - pub(crate) fn global(&self) -> bool { - match self { - Algo::Regress(reg) => reg.flags.contains('g'), - Algo::EndWith { .. } => false, - } - } - - pub(crate) fn sticky(&self) -> bool { - match self { - Algo::Regress(reg) => reg.flags.contains('y'), - Algo::EndWith { .. } => false, - } - } -} - -fn is_ends_with_regex(hir: &Hir) -> bool { - if let HirKind::Concat(list) = hir.kind() { - list[0].kind() != &HirKind::Look(Look::Start) - && list[list.len() - 1].kind() == &HirKind::Look(Look::End) - } else { - false - } -} - -#[cfg(test)] -mod test_algo { - use super::*; - - impl Algo { - fn end_with_pats(&self) -> std::collections::HashSet<&str> { - match self { - Algo::EndWith { pats } => pats.iter().map(|s| s.as_str()).collect(), - Algo::Regress(_) => panic!("expect EndWith"), - } - } - - fn is_end_with(&self) -> bool { - matches!(self, Self::EndWith { .. }) - } - - fn is_regress(&self) -> bool { - matches!(self, Self::Regress(..)) - } - } - - #[test] - fn should_use_end_with_algo_with_i_flag() { - assert!(Algo::new("\\.js$", "").unwrap().is_end_with()); - assert!(!Algo::new("\\.js$", "i").unwrap().is_end_with()); - } - - #[test] - fn correct_end_with() { - use std::collections::HashSet; - let algo = Algo::new("\\.js$", "").unwrap(); - assert_eq!(algo.end_with_pats(), HashSet::from([".js"])); - let algo = Algo::new("\\.(jsx?|tsx?)$", "").unwrap(); - assert_eq!( - algo.end_with_pats(), - HashSet::from([".jsx", ".tsx", ".js", ".ts"]) - ); - let algo = Algo::new("\\.(svg|png)$", "").unwrap(); - assert_eq!(algo.end_with_pats(), HashSet::from([".svg", ".png"])); - } - - #[test] - fn check_slow_path() { - // this is a full match - assert!(Algo::new("^\\.(svg|png)$", "").unwrap().is_regress()); - // wildcard match - assert!(Algo::new("\\..(svg|png)$", "").unwrap().is_regress()); - } -} diff --git a/crates/rspack_regex/src/lib.rs b/crates/rspack_regex/src/lib.rs index f542c662b5b7..e0827aefd052 100644 --- a/crates/rspack_regex/src/lib.rs +++ b/crates/rspack_regex/src/lib.rs @@ -1,87 +1,111 @@ -mod algo; mod napi; +mod native; +mod regress; use std::fmt::Debug; use cow_utils::CowUtils; +use native::RspackNativeRegex; +use regress::RspackRegressRegex; use rspack_cacheable::{ cacheable, with::{AsString, AsStringConverter}, }; -use rspack_error::{Error, error}; +use rspack_error::Error; use swc_core::ecma::ast::Regex as SwcRegex; -use self::algo::Algo; +#[derive(Debug, Clone)] +pub enum RspackRegexImpl { + Native(RspackNativeRegex), + Regress(RspackRegressRegex), +} + +impl RspackRegexImpl { + pub fn test(&self, text: &str) -> bool { + match self { + Self::Native(regex) => regex.test(text), + Self::Regress(regex) => regex.test(text), + } + } +} #[cacheable(with=AsString)] -#[derive(Clone, PartialEq, Eq, Hash, Debug)] -pub enum RspackRegex { - Regex(RspackNativeRegex), - Regress(RspackRegressRegex), +#[derive(Debug, Clone)] +pub struct RspackRegex { + pub regex: RspackRegexImpl, + pub flags: String, + pub source: String, +} + +impl PartialEq for RspackRegex { + fn eq(&self, other: &Self) -> bool { + self.flags == other.flags && self.source == other.source && self.r#type() == other.r#type() + } +} + +impl Eq for RspackRegex {} + +impl std::hash::Hash for RspackRegex { + fn hash(&self, state: &mut H) { + self.flags.hash(state); + self.source.hash(state); + self.r#type().hash(state); + } } impl RspackRegex { #[inline] - pub fn test(&self, text: &str) -> bool { - match self { - Self::Regex(regex) => regex.regex.is_match(text), - Self::Regress(regress) => regress.algo.test(text), + pub fn r#type(&self) -> String { + match self.regex { + RspackRegexImpl::Native(_) => "native".to_string(), + RspackRegexImpl::Regress(_) => "regress".to_string(), } } + #[inline] + pub fn test(&self, text: &str) -> bool { + self.regex.test(text) + } + #[inline] pub fn global(&self) -> bool { - match self { - // return false for native regex otherwise context options will emit warning - // but it is safe to do so because we can not use regex to capture multiple matches - Self::Regex(regex) => regex.flags.contains('g'), - Self::Regress(regress) => regress.algo.global(), - } + self.flags.contains('g') } #[inline] pub fn sticky(&self) -> bool { - match self { - Self::Regex(regex) => regex.flags.contains('y'), - Self::Regress(regress) => regress.algo.sticky(), - } + self.flags.contains('y') } #[inline] pub fn source(&self) -> &str { - match self { - Self::Regex(regex) => ®ex.source, - Self::Regress(regress) => ®ress.source, - } + &self.source } #[inline] pub fn flags(&self) -> &str { - match self { - Self::Regex(regex) => ®ex.flags, - Self::Regress(regress) => ®ress.flags, - } + &self.flags } #[inline] pub fn new(expr: &str) -> Result { - match RspackNativeRegex::with_flags(expr, "") { - Ok(regex) => Ok(Self::Regex(regex)), - Err(e) => { - println!("create native regex failed: {expr} {e}"); - let regress = RspackRegressRegex::with_flags(expr, "")?; - Ok(Self::Regress(regress)) - } - } + Self::with_flags(expr, "") } pub fn with_flags(expr: &str, flags: &str) -> Result { match RspackNativeRegex::with_flags(expr, flags) { - Ok(regex) => Ok(Self::Regex(regex)), - Err(e) => { - println!("create native regex failed: {expr} with {flags} {e}"); + Ok(regex) => Ok(Self { + regex: RspackRegexImpl::Native(regex), + flags: flags.to_string(), + source: expr.to_string(), + }), + Err(_) => { let regress = RspackRegressRegex::with_flags(expr, flags)?; - Ok(Self::Regress(regress)) + Ok(Self { + regex: RspackRegexImpl::Regress(regress), + flags: flags.to_string(), + source: expr.to_string(), + }) } } } @@ -89,20 +113,14 @@ impl RspackRegex { // https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/dependencies/ContextDependency.js#L30 #[inline] pub fn to_source_string(&self) -> String { - match self { - Self::Regex(regex) => format!("/{}/{}", regex.source, regex.flags), - Self::Regress(regress) => format!("/{}/{}", regress.source, regress.flags), - } + format!("/{}/{}", self.source, self.flags) } // https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/ContextModule.js#L192 #[inline] pub fn to_pretty_string(&self, strip_slash: bool) -> String { let res = if strip_slash { - match self { - Self::Regex(regex) => format!("{}{}", regex.source, regex.flags), - Self::Regress(regress) => format!("{}{}", regress.source, regress.flags), - } + format!("{}{}", self.source, self.flags) } else { self.to_source_string() }; @@ -114,117 +132,6 @@ impl RspackRegex { } } -#[derive(Clone, Debug)] -pub struct RspackNativeRegex { - regex: regex::Regex, - flags: String, - source: String, -} - -impl std::hash::Hash for RspackNativeRegex { - fn hash(&self, state: &mut H) { - self.flags.hash(state); - self.source.hash(state); - } -} - -impl PartialEq for RspackNativeRegex { - fn eq(&self, other: &Self) -> bool { - self.flags == other.flags && self.source == other.source - } -} - -impl Eq for RspackNativeRegex {} - -impl RspackNativeRegex { - pub fn with_flags(expr: &str, raw_flags: &str) -> Result { - let pattern = expr.replace("\\/", "/"); - - let mut flags = raw_flags.chars().collect::>(); - flags.sort_unstable(); - let mut applied_flags = String::new(); - // https://github.com/vercel/next.js/blob/203adbd5d054609812d1f3666184875dcca13f3a/turbopack/crates/turbo-esregex/src/lib.rs#L71-L94 - for flag in &flags { - match flag { - // indices for substring matches: not relevant for the regex itself - 'd' => {} - // global: default in rust, ignore - 'g' => {} - // case-insensitive: letters match both upper and lower case - 'i' => applied_flags.push('i'), - // multi-line mode: ^ and $ match begin/end of line - 'm' => applied_flags.push('m'), - // allow . to match \n - 's' => applied_flags.push('s'), - // Unicode support (enabled by default) - 'u' => applied_flags.push('u'), - // sticky search: not relevant for the regex itself - 'y' => {} - _ => { - return Err(error!( - "unsupported flag `{flag}` in regex: `{pattern}` with flags: `{raw_flags}`" - )); - } - } - } - - let regex = if applied_flags.is_empty() { - regex::Regex::new(&pattern).map_err(|e| error!(e))? - } else { - regex::Regex::new(&format!("(?{applied_flags}){pattern}")).map_err(|e| error!(e))? - }; - - Ok(Self { - regex, - flags: flags.into_iter().collect::(), - source: expr.to_string(), - }) - } -} - -#[derive(Clone)] -pub struct RspackRegressRegex { - algo: Box, - pub flags: String, - pub source: String, -} - -impl PartialEq for RspackRegressRegex { - fn eq(&self, other: &Self) -> bool { - self.flags == other.flags && self.source == other.source - } -} - -impl Eq for RspackRegressRegex {} - -impl std::hash::Hash for RspackRegressRegex { - fn hash(&self, state: &mut H) { - self.flags.hash(state); - self.source.hash(state); - } -} - -impl Debug for RspackRegressRegex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RspackRegressRegex") - .field("flags", &self.flags) - .field("source", &self.source) - .finish() - } -} - -impl RspackRegressRegex { - pub fn with_flags(expr: &str, flags: &str) -> Result { - let mut chars = flags.chars().collect::>(); - chars.sort_unstable(); - Ok(Self { - flags: chars.into_iter().collect::(), - source: expr.to_string(), - algo: Box::new(Algo::new(expr, flags)?), - }) - } -} - impl TryFrom<&SwcRegex> for RspackRegex { type Error = Error; @@ -243,10 +150,7 @@ impl TryFrom for RspackRegex { impl AsStringConverter for RspackRegex { fn to_string(&self) -> Result { - match self { - Self::Regex(regex) => Ok(format!("{}#{}", regex.flags, regex.source)), - Self::Regress(regress) => Ok(format!("{}#{}", regress.flags, regress.source)), - } + Ok(format!("{}#{}", self.flags, self.source)) } fn from_str(s: &str) -> Result where diff --git a/crates/rspack_regex/src/native.rs b/crates/rspack_regex/src/native.rs new file mode 100644 index 000000000000..3e6ef1835d2c --- /dev/null +++ b/crates/rspack_regex/src/native.rs @@ -0,0 +1,54 @@ +use std::fmt::Debug; + +use cow_utils::CowUtils; +use regex::Regex; +use rspack_error::{Error, error}; + +#[derive(Debug, Clone)] +pub struct RspackNativeRegex(pub Regex); + +impl RspackNativeRegex { + pub fn test(&self, text: &str) -> bool { + self.0.is_match(text) + } + + pub fn with_flags(expr: &str, raw_flags: &str) -> Result { + let pattern = expr.cow_replace("\\/", "/"); + + let mut flags = raw_flags.chars().collect::>(); + flags.sort_unstable(); + let mut applied_flags = String::new(); + // https://github.com/vercel/next.js/blob/203adbd5d054609812d1f3666184875dcca13f3a/turbopack/crates/turbo-esregex/src/lib.rs#L71-L94 + for flag in &flags { + match flag { + // indices for substring matches: not relevant for the regex itself + 'd' => {} + // global: default in rust, ignore + 'g' => {} + // case-insensitive: letters match both upper and lower case + 'i' => applied_flags.push('i'), + // multi-line mode: ^ and $ match begin/end of line + 'm' => applied_flags.push('m'), + // allow . to match \n + 's' => applied_flags.push('s'), + // Unicode support (enabled by default) + 'u' => applied_flags.push('u'), + // sticky search: not relevant for the regex itself + 'y' => {} + _ => { + return Err(error!( + "unsupported flag `{flag}` in regex: `{pattern}` with flags: `{raw_flags}`" + )); + } + } + } + + let regex = if applied_flags.is_empty() { + Regex::new(&pattern).map_err(|e| error!(e))? + } else { + Regex::new(format!("(?{applied_flags}){pattern}").as_str()).map_err(|e| error!(e))? + }; + + Ok(Self(regex)) + } +} diff --git a/crates/rspack_regex/src/regress.rs b/crates/rspack_regex/src/regress.rs new file mode 100644 index 000000000000..fe6ffcc268e0 --- /dev/null +++ b/crates/rspack_regex/src/regress.rs @@ -0,0 +1,23 @@ +use regress::Regex; +use rspack_error::{Error, error}; + +#[derive(Debug, Clone)] +pub struct RspackRegressRegex(pub Regex); + +impl RspackRegressRegex { + pub fn test(&self, text: &str) -> bool { + self.0.find(text).is_some() + } + pub fn with_flags(source: &str, flags: &str) -> Result { + let mut chars = flags.chars().collect::>(); + chars.sort_unstable(); + let regex = match Regex::with_flags(source, flags) { + Ok(regex) => Ok(regex), + Err(err) => Err(error!( + "Can't construct regex `/{source}/{flags}`, original error message: {err}" + )), + }?; + + Ok(Self(regex)) + } +} From 28dd463ac8c112b8b1c45c7490de8bdce7bd8b38 Mon Sep 17 00:00:00 2001 From: LingyuCoder Date: Fri, 8 Aug 2025 14:56:25 +0800 Subject: [PATCH 3/4] perf: try to create native regex first --- Cargo.lock | 1 - Cargo.toml | 32 ++++++++++++++------------------ crates/rspack_regex/Cargo.toml | 1 - 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 19e779a0724d..1c4aacc3d435 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4922,7 +4922,6 @@ dependencies = [ "cow-utils", "napi", "regex", - "regex-syntax 0.8.5", "regress", "rspack_cacheable", "rspack_error", diff --git a/Cargo.toml b/Cargo.toml index af22e535761b..c9d21639d386 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,6 @@ prost = { version = "0.13", default-features = false } quote = { version = "1.0.38", default-features = false } rayon = { version = "1.10.0", default-features = false } regex = { version = "1.11.1", default-features = false } -regex-syntax = { version = "0.8.5", default-features = false, features = ["std"] } regress = { version = "0.10.4", default-features = false, features = ["pattern"] } ropey = { version = "1.6.1", default-features = false } rspack_resolver = { features = ["package_json_raw_json_api", "yarn_pnp"], version = "0.6.2", default-features = false } @@ -123,20 +122,20 @@ inventory = { version = "0.3.17", default-features = false } rkyv = { version = "=0.8.8", default-features = false, features = ["std", "bytecheck"] } # Must be pinned with the same swc versions -pnp = { version = "0.12.1", default-features = false } -swc = { version = "34.0.0", default-features = false } -swc_config = { version = "3.1.1", default-features = false } -swc_core = { version = "35.0.0", default-features = false, features = ["parallel_rayon"] } -swc_ecma_lexer = { version = "22.0.0", default-features = false } -swc_ecma_minifier = { version = "29.0.0", default-features = false } -swc_error_reporters = { version = "16.0.1", default-features = false } -swc_html = { version = "25.0.0", default-features = false } -swc_html_minifier = { version = "29.0.0", default-features = false } -swc_node_comments = { version = "14.0.0", default-features = false } -swc_plugin_runner = { version = "18.0.0", default-features = false } - -wasmtime = { version = "35.0.0", default-features = false } -wasi-common = { version = "35.0.0", default-features = false } +pnp = { version = "0.12.1", default-features = false } +swc = { version = "34.0.0", default-features = false } +swc_config = { version = "3.1.1", default-features = false } +swc_core = { version = "35.0.0", default-features = false, features = ["parallel_rayon"] } +swc_ecma_lexer = { version = "22.0.0", default-features = false } +swc_ecma_minifier = { version = "29.0.0", default-features = false } +swc_error_reporters = { version = "16.0.1", default-features = false } +swc_html = { version = "25.0.0", default-features = false } +swc_html_minifier = { version = "29.0.0", default-features = false } +swc_node_comments = { version = "14.0.0", default-features = false } +swc_plugin_runner = { version = "18.0.0", default-features = false } + +wasi-common = { version = "35.0.0", default-features = false } +wasmtime = { version = "35.0.0", default-features = false } rspack_dojang = { version = "0.1.11", default-features = false } @@ -268,9 +267,6 @@ strip = "none" [profile.release.package] -[profile.release.package.regex-syntax] -opt-level = "s" - [profile.release.package.swc_ecma_transforms_module] opt-level = "s" diff --git a/crates/rspack_regex/Cargo.toml b/crates/rspack_regex/Cargo.toml index 628a45996277..5c335ccdbc3c 100644 --- a/crates/rspack_regex/Cargo.toml +++ b/crates/rspack_regex/Cargo.toml @@ -12,7 +12,6 @@ version.workspace = true cow-utils = { workspace = true } napi = { workspace = true } regex = { workspace = true } -regex-syntax = { workspace = true } regress = { workspace = true, features = ["backend-pikevm", "std"] } rspack_cacheable = { workspace = true } rspack_error = { workspace = true } From 85d2522f04216a71e1b85954757431ce8dbb144a Mon Sep 17 00:00:00 2001 From: LingyuCoder Date: Fri, 8 Aug 2025 16:11:08 +0800 Subject: [PATCH 4/4] perf: try to create native regex first --- crates/rspack_regex/src/native.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rspack_regex/src/native.rs b/crates/rspack_regex/src/native.rs index 3e6ef1835d2c..3e6564a3436b 100644 --- a/crates/rspack_regex/src/native.rs +++ b/crates/rspack_regex/src/native.rs @@ -13,7 +13,7 @@ impl RspackNativeRegex { } pub fn with_flags(expr: &str, raw_flags: &str) -> Result { - let pattern = expr.cow_replace("\\/", "/"); + let pattern = expr.cow_replace("\\\\", "\\"); let mut flags = raw_flags.chars().collect::>(); flags.sort_unstable();