Skip to content

Commit 76c0890

Browse files
committed
perf: create native regex first
1 parent a618407 commit 76c0890

File tree

5 files changed

+177
-54
lines changed

5 files changed

+177
-54
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/rspack_binding_api/src/raw_options/raw_builtins/raw_lazy_compilation.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ impl From<RawLazyCompilationTest> for LazyCompilationTest<LazyCompilationTestFn>
6969
fn from(value: RawLazyCompilationTest) -> Self {
7070
match value.0 {
7171
Either::A(regex) => Self::Regex(
72-
RspackRegex::with_flags(&regex.source, &regex.flags).unwrap_or_else(|_| {
72+
RspackRegex::with_flags(regex.source(), regex.flags()).unwrap_or_else(|_| {
7373
let msg = format!("[lazyCompilation]incorrect regex {regex:?}");
7474
panic!("{msg}");
7575
}),

crates/rspack_regex/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ version.workspace = true
1111
[dependencies]
1212
cow-utils = { workspace = true }
1313
napi = { workspace = true }
14+
regex = { workspace = true }
1415
regex-syntax = { workspace = true }
1516
regress = { workspace = true, features = ["backend-pikevm", "std"] }
1617
rspack_cacheable = { workspace = true }

crates/rspack_regex/src/lib.rs

Lines changed: 172 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -8,102 +8,220 @@ use rspack_cacheable::{
88
cacheable,
99
with::{AsString, AsStringConverter},
1010
};
11-
use rspack_error::Error;
11+
use rspack_error::{Error, error};
1212
use swc_core::ecma::ast::Regex as SwcRegex;
1313

1414
use self::algo::Algo;
1515

16-
/// Using wrapper type required by [TryFrom] trait
1716
#[cacheable(with=AsString)]
18-
#[derive(Clone)]
19-
pub struct RspackRegex {
20-
algo: Box<Algo>,
21-
pub flags: String,
22-
pub source: String,
23-
}
24-
25-
impl PartialEq for RspackRegex {
26-
fn eq(&self, other: &Self) -> bool {
27-
self.flags == other.flags && self.source == other.source
28-
}
29-
}
30-
31-
impl Eq for RspackRegex {}
32-
33-
impl std::hash::Hash for RspackRegex {
34-
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
35-
self.flags.hash(state);
36-
self.source.hash(state);
37-
}
38-
}
39-
40-
impl Debug for RspackRegex {
41-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42-
f.debug_struct("RspackRegex")
43-
.field("flags", &self.flags)
44-
.field("source", &self.source)
45-
.finish()
46-
}
17+
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
18+
pub enum RspackRegex {
19+
Regex(RspackNativeRegex),
20+
Regress(RspackRegressRegex),
4721
}
4822

4923
impl RspackRegex {
5024
#[inline]
5125
pub fn test(&self, text: &str) -> bool {
52-
self.algo.test(text)
26+
match self {
27+
Self::Regex(regex) => regex.regex.is_match(text),
28+
Self::Regress(regress) => regress.algo.test(text),
29+
}
5330
}
5431

5532
#[inline]
5633
pub fn global(&self) -> bool {
57-
self.algo.global()
34+
match self {
35+
// return false for native regex otherwise context options will emit warning
36+
// but it is safe to do so because we can not use regex to capture multiple matches
37+
Self::Regex(regex) => regex.flags.contains('g'),
38+
Self::Regress(regress) => regress.algo.global(),
39+
}
5840
}
5941

6042
#[inline]
6143
pub fn sticky(&self) -> bool {
62-
self.algo.sticky()
44+
match self {
45+
Self::Regex(regex) => regex.flags.contains('y'),
46+
Self::Regress(regress) => regress.algo.sticky(),
47+
}
6348
}
6449

6550
#[inline]
6651
pub fn source(&self) -> &str {
67-
&self.source
52+
match self {
53+
Self::Regex(regex) => &regex.source,
54+
Self::Regress(regress) => &regress.source,
55+
}
6856
}
6957

7058
#[inline]
7159
pub fn flags(&self) -> &str {
72-
&self.flags
60+
match self {
61+
Self::Regex(regex) => &regex.flags,
62+
Self::Regress(regress) => &regress.flags,
63+
}
7364
}
7465

7566
#[inline]
7667
pub fn new(expr: &str) -> Result<Self, Error> {
77-
Self::with_flags(expr, "")
68+
match RspackNativeRegex::with_flags(expr, "") {
69+
Ok(regex) => Ok(Self::Regex(regex)),
70+
Err(e) => {
71+
println!("create native regex failed: {expr} {e}");
72+
let regress = RspackRegressRegex::with_flags(expr, "")?;
73+
Ok(Self::Regress(regress))
74+
}
75+
}
7876
}
7977

8078
pub fn with_flags(expr: &str, flags: &str) -> Result<Self, Error> {
81-
let mut chars = flags.chars().collect::<Vec<char>>();
82-
chars.sort_unstable();
83-
Ok(Self {
84-
flags: chars.into_iter().collect::<String>(),
85-
source: expr.to_string(),
86-
algo: Box::new(Algo::new(expr, flags)?),
87-
})
79+
match RspackNativeRegex::with_flags(expr, flags) {
80+
Ok(regex) => Ok(Self::Regex(regex)),
81+
Err(e) => {
82+
println!("create native regex failed: {expr} with {flags} {e}");
83+
let regress = RspackRegressRegex::with_flags(expr, flags)?;
84+
Ok(Self::Regress(regress))
85+
}
86+
}
8887
}
8988

9089
// https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/dependencies/ContextDependency.js#L30
9190
#[inline]
9291
pub fn to_source_string(&self) -> String {
93-
format!("/{}/{}", self.source, self.flags)
92+
match self {
93+
Self::Regex(regex) => format!("/{}/{}", regex.source, regex.flags),
94+
Self::Regress(regress) => format!("/{}/{}", regress.source, regress.flags),
95+
}
9496
}
9597

9698
// https://github.com/webpack/webpack/blob/4baf1c075d59babd028f8201526cb8c4acfd24a0/lib/ContextModule.js#L192
9799
#[inline]
98100
pub fn to_pretty_string(&self, strip_slash: bool) -> String {
99-
if strip_slash {
100-
format!("{}{}", self.source, self.flags)
101+
let res = if strip_slash {
102+
match self {
103+
Self::Regex(regex) => format!("{}{}", regex.source, regex.flags),
104+
Self::Regress(regress) => format!("{}{}", regress.source, regress.flags),
105+
}
101106
} else {
102107
self.to_source_string()
108+
};
109+
110+
res
111+
.cow_replace('!', "%21")
112+
.cow_replace('|', "%7C")
113+
.into_owned()
114+
}
115+
}
116+
117+
#[derive(Clone, Debug)]
118+
pub struct RspackNativeRegex {
119+
regex: regex::Regex,
120+
flags: String,
121+
source: String,
122+
}
123+
124+
impl std::hash::Hash for RspackNativeRegex {
125+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
126+
self.flags.hash(state);
127+
self.source.hash(state);
128+
}
129+
}
130+
131+
impl PartialEq for RspackNativeRegex {
132+
fn eq(&self, other: &Self) -> bool {
133+
self.flags == other.flags && self.source == other.source
134+
}
135+
}
136+
137+
impl Eq for RspackNativeRegex {}
138+
139+
impl RspackNativeRegex {
140+
pub fn with_flags(expr: &str, raw_flags: &str) -> Result<Self, Error> {
141+
let pattern = expr.replace("\\/", "/");
142+
143+
let mut flags = raw_flags.chars().collect::<Vec<char>>();
144+
flags.sort_unstable();
145+
let mut applied_flags = String::new();
146+
// https://github.com/vercel/next.js/blob/203adbd5d054609812d1f3666184875dcca13f3a/turbopack/crates/turbo-esregex/src/lib.rs#L71-L94
147+
for flag in &flags {
148+
match flag {
149+
// indices for substring matches: not relevant for the regex itself
150+
'd' => {}
151+
// global: default in rust, ignore
152+
'g' => {}
153+
// case-insensitive: letters match both upper and lower case
154+
'i' => applied_flags.push('i'),
155+
// multi-line mode: ^ and $ match begin/end of line
156+
'm' => applied_flags.push('m'),
157+
// allow . to match \n
158+
's' => applied_flags.push('s'),
159+
// Unicode support (enabled by default)
160+
'u' => applied_flags.push('u'),
161+
// sticky search: not relevant for the regex itself
162+
'y' => {}
163+
_ => {
164+
return Err(error!(
165+
"unsupported flag `{flag}` in regex: `{pattern}` with flags: `{raw_flags}`"
166+
));
167+
}
168+
}
103169
}
104-
.cow_replace('!', "%21")
105-
.cow_replace('|', "%7C")
106-
.into_owned()
170+
171+
let regex = if applied_flags.is_empty() {
172+
regex::Regex::new(&pattern).map_err(|e| error!(e))?
173+
} else {
174+
regex::Regex::new(&format!("(?{applied_flags}){pattern}")).map_err(|e| error!(e))?
175+
};
176+
177+
Ok(Self {
178+
regex,
179+
flags: flags.into_iter().collect::<String>(),
180+
source: expr.to_string(),
181+
})
182+
}
183+
}
184+
185+
#[derive(Clone)]
186+
pub struct RspackRegressRegex {
187+
algo: Box<Algo>,
188+
pub flags: String,
189+
pub source: String,
190+
}
191+
192+
impl PartialEq for RspackRegressRegex {
193+
fn eq(&self, other: &Self) -> bool {
194+
self.flags == other.flags && self.source == other.source
195+
}
196+
}
197+
198+
impl Eq for RspackRegressRegex {}
199+
200+
impl std::hash::Hash for RspackRegressRegex {
201+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
202+
self.flags.hash(state);
203+
self.source.hash(state);
204+
}
205+
}
206+
207+
impl Debug for RspackRegressRegex {
208+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209+
f.debug_struct("RspackRegressRegex")
210+
.field("flags", &self.flags)
211+
.field("source", &self.source)
212+
.finish()
213+
}
214+
}
215+
216+
impl RspackRegressRegex {
217+
pub fn with_flags(expr: &str, flags: &str) -> Result<Self, Error> {
218+
let mut chars = flags.chars().collect::<Vec<char>>();
219+
chars.sort_unstable();
220+
Ok(Self {
221+
flags: chars.into_iter().collect::<String>(),
222+
source: expr.to_string(),
223+
algo: Box::new(Algo::new(expr, flags)?),
224+
})
107225
}
108226
}
109227

@@ -125,7 +243,10 @@ impl TryFrom<SwcRegex> for RspackRegex {
125243

126244
impl AsStringConverter for RspackRegex {
127245
fn to_string(&self) -> Result<String, rspack_cacheable::SerializeError> {
128-
Ok(format!("{}#{}", self.flags, self.source))
246+
match self {
247+
Self::Regex(regex) => Ok(format!("{}#{}", regex.flags, regex.source)),
248+
Self::Regress(regress) => Ok(format!("{}#{}", regress.flags, regress.source)),
249+
}
129250
}
130251
fn from_str(s: &str) -> Result<Self, rspack_cacheable::DeserializeError>
131252
where

crates/rspack_regex/src/napi.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ impl ToNapiValue for RspackRegex {
6060
let global = env.get_global()?;
6161
let regex = global.get_named_property::<Function<'_, _>>("RegExp")?;
6262

63-
let flags = env.create_string(&val.flags)?;
64-
let source = env.create_string(&val.source)?;
63+
let flags = env.create_string(val.flags())?;
64+
let source = env.create_string(val.source())?;
6565

6666
Ok(regex.new_instance((source, flags))?.raw())
6767
}

0 commit comments

Comments
 (0)