diff --git a/Cargo.lock b/Cargo.lock index 5eb5bc73ae37..1c4aacc3d435 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4921,7 +4921,7 @@ version = "0.4.11" dependencies = [ "cow-utils", "napi", - "regex-syntax 0.8.5", + "regex", "regress", "rspack_cacheable", "rspack_error", diff --git a/Cargo.toml b/Cargo.toml index af22e535761b..c9d21639d386 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,6 @@ prost = { version = "0.13", default-features = false } quote = { version = "1.0.38", default-features = false } rayon = { version = "1.10.0", default-features = false } regex = { version = "1.11.1", default-features = false } -regex-syntax = { version = "0.8.5", default-features = false, features = ["std"] } regress = { version = "0.10.4", default-features = false, features = ["pattern"] } ropey = { version = "1.6.1", default-features = false } rspack_resolver = { features = ["package_json_raw_json_api", "yarn_pnp"], version = "0.6.2", default-features = false } @@ -123,20 +122,20 @@ inventory = { version = "0.3.17", default-features = false } rkyv = { version = "=0.8.8", default-features = false, features = ["std", "bytecheck"] } # Must be pinned with the same swc versions -pnp = { version = "0.12.1", default-features = false } -swc = { version = "34.0.0", default-features = false } -swc_config = { version = "3.1.1", default-features = false } -swc_core = { version = "35.0.0", default-features = false, features = ["parallel_rayon"] } -swc_ecma_lexer = { version = "22.0.0", default-features = false } -swc_ecma_minifier = { version = "29.0.0", default-features = false } -swc_error_reporters = { version = "16.0.1", default-features = false } -swc_html = { version = "25.0.0", default-features = false } -swc_html_minifier = { version = "29.0.0", default-features = false } -swc_node_comments = { version = "14.0.0", default-features = false } -swc_plugin_runner = { version = "18.0.0", default-features = false } - -wasmtime = { version = "35.0.0", default-features = false } -wasi-common = { version = "35.0.0", default-features = false } +pnp = { version = "0.12.1", default-features = false } +swc = { version = "34.0.0", default-features = false } +swc_config = { version = "3.1.1", default-features = false } +swc_core = { version = "35.0.0", default-features = false, features = ["parallel_rayon"] } +swc_ecma_lexer = { version = "22.0.0", default-features = false } +swc_ecma_minifier = { version = "29.0.0", default-features = false } +swc_error_reporters = { version = "16.0.1", default-features = false } +swc_html = { version = "25.0.0", default-features = false } +swc_html_minifier = { version = "29.0.0", default-features = false } +swc_node_comments = { version = "14.0.0", default-features = false } +swc_plugin_runner = { version = "18.0.0", default-features = false } + +wasi-common = { version = "35.0.0", default-features = false } +wasmtime = { version = "35.0.0", default-features = false } rspack_dojang = { version = "0.1.11", default-features = false } @@ -268,9 +267,6 @@ strip = "none" [profile.release.package] -[profile.release.package.regex-syntax] -opt-level = "s" - [profile.release.package.swc_ecma_transforms_module] opt-level = "s" diff --git a/crates/rspack/tests/snapshots/defaults__default_options.snap b/crates/rspack/tests/snapshots/defaults__default_options.snap index 64b979b9f24a..bbc4c7a8184a 100644 --- a/crates/rspack/tests/snapshots/defaults__default_options.snap +++ b/crates/rspack/tests/snapshots/defaults__default_options.snap @@ -1,6 +1,5 @@ --- source: crates/rspack/tests/defaults.rs -assertion_line: 16 expression: options --- CompilerOptions { @@ -1404,6 +1403,13 @@ CompilerOptions { ), wrapped_context_reg_exp: Some( RspackRegex { + regex: Native( + RspackNativeRegex( + Regex( + ".*", + ), + ), + ), flags: "", source: ".*", }, diff --git a/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs b/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs index e49b931f6aad..c74601f4a637 100644 --- a/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs +++ b/crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs @@ -69,7 +69,7 @@ impl From for LazyCompilationTest fn from(value: RawLazyCompilationTest) -> Self { match value.0 { Either::A(regex) => Self::Regex( - RspackRegex::with_flags(®ex.source, ®ex.flags).unwrap_or_else(|_| { + RspackRegex::with_flags(regex.source(), regex.flags()).unwrap_or_else(|_| { let msg = format!("[lazyCompilation]incorrect regex {regex:?}"); panic!("{msg}"); }), diff --git a/crates/rspack_regex/Cargo.toml b/crates/rspack_regex/Cargo.toml index 2a15993b36ad..5c335ccdbc3c 100644 --- a/crates/rspack_regex/Cargo.toml +++ b/crates/rspack_regex/Cargo.toml @@ -11,7 +11,7 @@ version.workspace = true [dependencies] cow-utils = { workspace = true } napi = { workspace = true } -regex-syntax = { workspace = true } +regex = { workspace = true } regress = { workspace = true, features = ["backend-pikevm", "std"] } rspack_cacheable = { workspace = true } rspack_error = { workspace = true } diff --git a/crates/rspack_regex/src/algo.rs b/crates/rspack_regex/src/algo.rs deleted file mode 100644 index 3df4d3641bda..000000000000 --- a/crates/rspack_regex/src/algo.rs +++ /dev/null @@ -1,170 +0,0 @@ -use std::{fmt::Debug, hash::Hash}; - -use regex_syntax::hir::{Hir, HirKind, Look, literal::ExtractKind}; -use regress::Match; -use rspack_error::{Error, error}; - -#[derive(Clone)] -pub struct HashRegressRegex { - pub regex: regress::Regex, - expr: String, - flags: String, -} - -impl Hash for HashRegressRegex { - fn hash(&self, state: &mut H) { - self.expr.hash(state); - self.flags.hash(state) - } -} - -impl Debug for HashRegressRegex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - Debug::fmt(&self.regex, f) - } -} - -impl HashRegressRegex { - pub(crate) fn new(expr: &str, flags: &str) -> Result { - match regress::Regex::with_flags(expr, flags) { - Ok(regex) => Ok(Self { - regex, - expr: expr.to_string(), - flags: flags.to_string(), - }), - Err(err) => Err(error!( - "Can't construct regex `/{expr}/{flags}`, original error message: {err}" - )), - } - } - - fn find(&self, text: &str) -> Option { - self.regex.find(text) - } -} - -#[derive(Clone, Debug, Hash)] -pub enum Algo { - /// Regress is considered having the same behaviors as RegExp in JS. - /// But Regress has poor performance. To improve performance of regex matching, - /// we would try to use some fast algo to do matching, when we detect some special pattern. - /// See details at https://github.com/web-infra-dev/rspack/pull/3113 - EndWith { - pats: Vec, - }, - Regress(HashRegressRegex), -} - -impl Algo { - pub(crate) fn new(expr: &str, flags: &str) -> Result { - let ignore_case = flags.contains('i') || flags.contains('g') || flags.contains('y'); - if let Some(algo) = Self::try_compile_to_end_with_fast_path(expr) - && !ignore_case - { - Ok(algo) - } else { - match HashRegressRegex::new(expr, flags) { - Ok(regex) => Ok(Algo::Regress(regex)), - Err(e) => Err(e), - } - } - } - - fn try_compile_to_end_with_fast_path(expr: &str) -> Option { - let hir = regex_syntax::parse(expr).ok()?; - let seq = regex_syntax::hir::literal::Extractor::new() - .kind(ExtractKind::Suffix) - .extract(&hir); - if is_ends_with_regex(&hir) && seq.is_exact() { - let pats = seq - .literals()? - .iter() - .map(|item| String::from_utf8_lossy(item.as_bytes()).to_string()) - .collect::>(); - - Some(Algo::EndWith { pats }) - } else { - None - } - } - - pub(crate) fn test(&self, str: &str) -> bool { - match self { - Algo::Regress(regex) => regex.find(str).is_some(), - Algo::EndWith { pats } => pats.iter().any(|pat| str.ends_with(pat)), - } - } - - pub(crate) fn global(&self) -> bool { - match self { - Algo::Regress(reg) => reg.flags.contains('g'), - Algo::EndWith { .. } => false, - } - } - - pub(crate) fn sticky(&self) -> bool { - match self { - Algo::Regress(reg) => reg.flags.contains('y'), - Algo::EndWith { .. } => false, - } - } -} - -fn is_ends_with_regex(hir: &Hir) -> bool { - if let HirKind::Concat(list) = hir.kind() { - list[0].kind() != &HirKind::Look(Look::Start) - && list[list.len() - 1].kind() == &HirKind::Look(Look::End) - } else { - false - } -} - -#[cfg(test)] -mod test_algo { - use super::*; - - impl Algo { - fn end_with_pats(&self) -> std::collections::HashSet<&str> { - match self { - Algo::EndWith { pats } => pats.iter().map(|s| s.as_str()).collect(), - Algo::Regress(_) => panic!("expect EndWith"), - } - } - - fn is_end_with(&self) -> bool { - matches!(self, Self::EndWith { .. }) - } - - fn is_regress(&self) -> bool { - matches!(self, Self::Regress(..)) - } - } - - #[test] - fn should_use_end_with_algo_with_i_flag() { - assert!(Algo::new("\\.js$", "").unwrap().is_end_with()); - assert!(!Algo::new("\\.js$", "i").unwrap().is_end_with()); - } - - #[test] - fn correct_end_with() { - use std::collections::HashSet; - let algo = Algo::new("\\.js$", "").unwrap(); - assert_eq!(algo.end_with_pats(), HashSet::from([".js"])); - let algo = Algo::new("\\.(jsx?|tsx?)$", "").unwrap(); - assert_eq!( - algo.end_with_pats(), - HashSet::from([".jsx", ".tsx", ".js", ".ts"]) - ); - let algo = Algo::new("\\.(svg|png)$", "").unwrap(); - assert_eq!(algo.end_with_pats(), HashSet::from([".svg", ".png"])); - } - - #[test] - fn check_slow_path() { - // this is a full match - assert!(Algo::new("^\\.(svg|png)$", "").unwrap().is_regress()); - // wildcard match - assert!(Algo::new("\\..(svg|png)$", "").unwrap().is_regress()); - } -} diff --git a/crates/rspack_regex/src/lib.rs b/crates/rspack_regex/src/lib.rs index 411ee6b481cb..e0827aefd052 100644 --- a/crates/rspack_regex/src/lib.rs +++ b/crates/rspack_regex/src/lib.rs @@ -1,9 +1,12 @@ -mod algo; mod napi; +mod native; +mod regress; use std::fmt::Debug; use cow_utils::CowUtils; +use native::RspackNativeRegex; +use regress::RspackRegressRegex; use rspack_cacheable::{ cacheable, with::{AsString, AsStringConverter}, @@ -11,20 +14,32 @@ use rspack_cacheable::{ use rspack_error::Error; use swc_core::ecma::ast::Regex as SwcRegex; -use self::algo::Algo; +#[derive(Debug, Clone)] +pub enum RspackRegexImpl { + Native(RspackNativeRegex), + Regress(RspackRegressRegex), +} + +impl RspackRegexImpl { + pub fn test(&self, text: &str) -> bool { + match self { + Self::Native(regex) => regex.test(text), + Self::Regress(regex) => regex.test(text), + } + } +} -/// Using wrapper type required by [TryFrom] trait #[cacheable(with=AsString)] -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct RspackRegex { - algo: Box, + pub regex: RspackRegexImpl, pub flags: String, pub source: String, } impl PartialEq for RspackRegex { fn eq(&self, other: &Self) -> bool { - self.flags == other.flags && self.source == other.source + self.flags == other.flags && self.source == other.source && self.r#type() == other.r#type() } } @@ -34,32 +49,32 @@ impl std::hash::Hash for RspackRegex { fn hash(&self, state: &mut H) { self.flags.hash(state); self.source.hash(state); + self.r#type().hash(state); } } -impl Debug for RspackRegex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RspackRegex") - .field("flags", &self.flags) - .field("source", &self.source) - .finish() +impl RspackRegex { + #[inline] + pub fn r#type(&self) -> String { + match self.regex { + RspackRegexImpl::Native(_) => "native".to_string(), + RspackRegexImpl::Regress(_) => "regress".to_string(), + } } -} -impl RspackRegex { #[inline] pub fn test(&self, text: &str) -> bool { - self.algo.test(text) + self.regex.test(text) } #[inline] pub fn global(&self) -> bool { - self.algo.global() + self.flags.contains('g') } #[inline] pub fn sticky(&self) -> bool { - self.algo.sticky() + self.flags.contains('y') } #[inline] @@ -78,13 +93,21 @@ impl RspackRegex { } pub fn with_flags(expr: &str, flags: &str) -> Result { - let mut chars = flags.chars().collect::>(); - chars.sort_unstable(); - Ok(Self { - flags: chars.into_iter().collect::(), - source: expr.to_string(), - algo: Box::new(Algo::new(expr, flags)?), - }) + match RspackNativeRegex::with_flags(expr, flags) { + Ok(regex) => Ok(Self { + regex: RspackRegexImpl::Native(regex), + flags: flags.to_string(), + source: expr.to_string(), + }), + Err(_) => { + let regress = RspackRegressRegex::with_flags(expr, flags)?; + Ok(Self { + regex: RspackRegexImpl::Regress(regress), + flags: flags.to_string(), + source: expr.to_string(), + }) + } + } } // https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/dependencies/ContextDependency.js#L30 @@ -96,14 +119,16 @@ impl RspackRegex { // https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/ContextModule.js#L192 #[inline] pub fn to_pretty_string(&self, strip_slash: bool) -> String { - if strip_slash { + let res = if strip_slash { format!("{}{}", self.source, self.flags) } else { self.to_source_string() - } - .cow_replace('!', "%21") - .cow_replace('|', "%7C") - .into_owned() + }; + + res + .cow_replace('!', "%21") + .cow_replace('|', "%7C") + .into_owned() } } diff --git a/crates/rspack_regex/src/napi.rs b/crates/rspack_regex/src/napi.rs index 1a8e61c77ad1..bc68702c34b6 100644 --- a/crates/rspack_regex/src/napi.rs +++ b/crates/rspack_regex/src/napi.rs @@ -60,8 +60,8 @@ impl ToNapiValue for RspackRegex { let global = env.get_global()?; let regex = global.get_named_property::>("RegExp")?; - let flags = env.create_string(&val.flags)?; - let source = env.create_string(&val.source)?; + let flags = env.create_string(val.flags())?; + let source = env.create_string(val.source())?; Ok(regex.new_instance((source, flags))?.raw()) } diff --git a/crates/rspack_regex/src/native.rs b/crates/rspack_regex/src/native.rs new file mode 100644 index 000000000000..3e6564a3436b --- /dev/null +++ b/crates/rspack_regex/src/native.rs @@ -0,0 +1,54 @@ +use std::fmt::Debug; + +use cow_utils::CowUtils; +use regex::Regex; +use rspack_error::{Error, error}; + +#[derive(Debug, Clone)] +pub struct RspackNativeRegex(pub Regex); + +impl RspackNativeRegex { + pub fn test(&self, text: &str) -> bool { + self.0.is_match(text) + } + + pub fn with_flags(expr: &str, raw_flags: &str) -> Result { + let pattern = expr.cow_replace("\\\\", "\\"); + + let mut flags = raw_flags.chars().collect::>(); + flags.sort_unstable(); + let mut applied_flags = String::new(); + // https://github.com/vercel/next.js/blob/203adbd5d054609812d1f3666184875dcca13f3a/turbopack/crates/turbo-esregex/src/lib.rs#L71-L94 + for flag in &flags { + match flag { + // indices for substring matches: not relevant for the regex itself + 'd' => {} + // global: default in rust, ignore + 'g' => {} + // case-insensitive: letters match both upper and lower case + 'i' => applied_flags.push('i'), + // multi-line mode: ^ and $ match begin/end of line + 'm' => applied_flags.push('m'), + // allow . to match \n + 's' => applied_flags.push('s'), + // Unicode support (enabled by default) + 'u' => applied_flags.push('u'), + // sticky search: not relevant for the regex itself + 'y' => {} + _ => { + return Err(error!( + "unsupported flag `{flag}` in regex: `{pattern}` with flags: `{raw_flags}`" + )); + } + } + } + + let regex = if applied_flags.is_empty() { + Regex::new(&pattern).map_err(|e| error!(e))? + } else { + Regex::new(format!("(?{applied_flags}){pattern}").as_str()).map_err(|e| error!(e))? + }; + + Ok(Self(regex)) + } +} diff --git a/crates/rspack_regex/src/regress.rs b/crates/rspack_regex/src/regress.rs new file mode 100644 index 000000000000..fe6ffcc268e0 --- /dev/null +++ b/crates/rspack_regex/src/regress.rs @@ -0,0 +1,23 @@ +use regress::Regex; +use rspack_error::{Error, error}; + +#[derive(Debug, Clone)] +pub struct RspackRegressRegex(pub Regex); + +impl RspackRegressRegex { + pub fn test(&self, text: &str) -> bool { + self.0.find(text).is_some() + } + pub fn with_flags(source: &str, flags: &str) -> Result { + let mut chars = flags.chars().collect::>(); + chars.sort_unstable(); + let regex = match Regex::with_flags(source, flags) { + Ok(regex) => Ok(regex), + Err(err) => Err(error!( + "Can't construct regex `/{source}/{flags}`, original error message: {err}" + )), + }?; + + Ok(Self(regex)) + } +}