Skip to content

Commit b3c7436

Browse files
authored
perf: Replace hot regex with parser (#11341)
1 parent 1dd544f commit b3c7436

File tree

17 files changed

+451
-204
lines changed

17 files changed

+451
-204
lines changed

Cargo.lock

Lines changed: 23 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ urlencoding = { version = "2.1.3", default-features = false }
112112
ustr = { package = "ustr-fxhash", version = "1.0.1", default-features = false }
113113
wasmparser = { version = "0.222.0", default-features = false }
114114
xxhash-rust = { version = "0.8.14", default-features = false }
115+
winnow = { version = "0.7.12", default-features = false, features = ["std", "simd"] }
116+
memchr = { version = "2.7.5", default-features = false }
115117

116118
# Pinned
117119
napi = { version = "3.1.6", default-features = false }

crates/rspack_core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ swc_node_comments = { workspace = true }
7979
tokio = { workspace = true, features = ["rt", "macros"] }
8080
tracing = { workspace = true }
8181
ustr = { workspace = true }
82+
winnow = { workspace = true }
8283

8384
[dev-dependencies]
8485
pretty_assertions = { workspace = true }

crates/rspack_core/src/concatenated_module.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2753,7 +2753,7 @@ pub fn split_readable_identifier(extra_info: &str) -> Vec<String> {
27532753
let extra_info = REGEX.replace_all(extra_info, "");
27542754
let mut splitted_info: Vec<String> = extra_info
27552755
.split('/')
2756-
.map(|s| escape_identifier(s).to_string())
2756+
.map(|s| escape_identifier(s).into_owned())
27572757
.collect();
27582758
splitted_info.reverse();
27592759
splitted_info
@@ -2767,5 +2767,5 @@ pub fn escape_name(name: &str) -> String {
27672767
return "namespaceObject".to_string();
27682768
}
27692769

2770-
escape_identifier(name).to_string()
2770+
escape_identifier(name).into_owned()
27712771
}

crates/rspack_core/src/normal_module_factory.rs

Lines changed: 60 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
1-
use std::{
2-
borrow::Cow,
3-
sync::{Arc, LazyLock},
4-
};
1+
use std::{borrow::Cow, sync::Arc};
52

6-
use regex::Regex;
73
use rspack_cacheable::cacheable;
84
use rspack_error::{Result, error};
95
use rspack_hook::define_hook;
@@ -12,6 +8,7 @@ use rspack_paths::Utf8PathBuf;
128
use rspack_util::MergeFrom;
139
use sugar_path::SugarPath;
1410
use swc_core::common::Span;
11+
use winnow::prelude::*;
1512

1613
use crate::{
1714
AssetInlineGeneratorOptions, AssetResourceGeneratorOptions, BoxLoader, BoxModule,
@@ -79,16 +76,6 @@ impl ModuleFactory for NormalModuleFactory {
7976
}
8077
}
8178

82-
static MATCH_RESOURCE_REGEX: LazyLock<Regex> =
83-
LazyLock::new(|| Regex::new("^([^!]+)!=!").expect("Failed to initialize `MATCH_RESOURCE_REGEX`"));
84-
85-
static MATCH_WEBPACK_EXT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
86-
Regex::new(r#"\.webpack\[([^\]]+)\]$"#).expect("Failed to initialize `MATCH_WEBPACK_EXT_REGEX`")
87-
});
88-
89-
static ELEMENT_SPLIT_REGEX: LazyLock<Regex> =
90-
LazyLock::new(|| Regex::new(r"!+").expect("Failed to initialize `ELEMENT_SPLIT_REGEX`"));
91-
9279
const HYPHEN: char = '-';
9380
const EXCLAMATION: char = '!';
9481
const DOT: char = '.';
@@ -171,9 +158,8 @@ impl NormalModuleFactory {
171158
if scheme.is_none() {
172159
let mut request_without_match_resource = data.request.as_str();
173160
request_without_match_resource = {
174-
if let Some(m) = MATCH_RESOURCE_REGEX.captures(request_without_match_resource) {
161+
if let Ok((resource, full_matched)) = match_resource(request_without_match_resource) {
175162
let match_resource = {
176-
let resource = m.get(1).expect("Should have match resource").as_str();
177163
let mut chars = resource.chars();
178164
let first_char = chars.next();
179165
let second_char = chars.next();
@@ -209,10 +195,7 @@ impl NormalModuleFactory {
209195
);
210196

211197
// e.g. ./index.js!=!
212-
let whole_matched = m
213-
.get(0)
214-
.expect("should guaranteed to return a non-None value.")
215-
.as_str();
198+
let whole_matched = full_matched;
216199

217200
match request_without_match_resource
218201
.char_indices()
@@ -259,7 +242,7 @@ impl NormalModuleFactory {
259242
Some((pos, _)) => &request_without_match_resource[pos..],
260243
None => request_without_match_resource,
261244
};
262-
ELEMENT_SPLIT_REGEX.split(s).collect::<Vec<_>>()
245+
split_element(s)
263246
};
264247

265248
unresolved_resource = raw_elements
@@ -371,15 +354,10 @@ impl NormalModuleFactory {
371354
};
372355

373356
let resolved_module_rules = if let Some(match_resource_data) = &mut match_resource_data
374-
&& let Some(captures) = MATCH_WEBPACK_EXT_REGEX.captures(&match_resource_data.resource)
375-
&& let Some(module_type) = captures.get(1)
357+
&& let Ok((module, module_type)) = match_webpack_ext(&match_resource_data.resource)
376358
{
377-
match_module_type = Some(module_type.as_str().into());
378-
match_resource_data.resource = match_resource_data
379-
.resource
380-
.strip_suffix(&format!(".webpack[{}]", module_type.as_str()))
381-
.expect("should success")
382-
.to_owned();
359+
match_module_type = Some(module_type.into());
360+
match_resource_data.resource = module.into();
383361

384362
vec![]
385363
} else {
@@ -943,12 +921,58 @@ pub struct NormalModuleCreateData {
943921
pub context: Option<String>,
944922
}
945923

924+
fn split_element(mut input: &str) -> Vec<&str> {
925+
use winnow::{
926+
combinator::separated,
927+
error::ContextError,
928+
token::{take_till, take_while},
929+
};
930+
931+
separated::<_, _, _, _, ContextError, _, _>(.., take_till(.., '!'), take_while(1.., '!'))
932+
.parse_next(&mut input)
933+
.expect("split should never fail")
934+
}
935+
936+
fn match_resource(mut input: &str) -> winnow::ModalResult<(&str, &str)> {
937+
use winnow::{combinator::terminated, token::take_until};
938+
939+
let backup = input;
940+
941+
let res = terminated(take_until(1.., '!'), "!=!").parse_next(&mut input)?;
942+
let whole_matched = &backup[..backup.len() - input.len()];
943+
Ok((res, whole_matched))
944+
}
945+
946+
fn match_webpack_ext(mut input: &str) -> winnow::ModalResult<(&str, &str)> {
947+
use winnow::{
948+
combinator::{delimited, eof, preceded, terminated},
949+
token::take_until,
950+
};
951+
952+
let parser = (
953+
take_until(0.., ".webpack"),
954+
preceded(".webpack", delimited('[', take_until(1.., ']'), ']')),
955+
);
956+
957+
terminated(parser, eof).parse_next(&mut input)
958+
}
959+
946960
#[test]
947-
fn match_webpack_ext() {
948-
assert!(MATCH_WEBPACK_EXT_REGEX.is_match("foo.webpack[type/javascript]"));
949-
let cap = MATCH_WEBPACK_EXT_REGEX
950-
.captures("foo.webpack[type/javascript]")
951-
.unwrap();
961+
fn test_split_element() {
962+
assert_eq!(split_element("a!a"), vec!["a", "a"]);
963+
assert_eq!(split_element("a!!a"), vec!["a", "a"]);
964+
assert_eq!(split_element("!!a!!a!!"), vec!["", "a", "a", ""]);
965+
}
966+
967+
#[test]
968+
fn test_match_webpack_ext() {
969+
assert!(match_webpack_ext("foo.webpack[type/javascript]").is_ok());
970+
let cap = match_webpack_ext("foo.webpack[type/javascript]").unwrap();
971+
972+
assert_eq!(cap, ("foo", "type/javascript"));
952973

953-
assert_eq!(cap.get(1).unwrap().as_str(), "type/javascript");
974+
assert_eq!(
975+
match_webpack_ext("foo.css.webpack[javascript/auto]"),
976+
Ok(("foo.css", "javascript/auto"))
977+
);
954978
}

crates/rspack_core/src/options/filename.rs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@ use std::{
44
hash::{Hash, Hasher},
55
ops::Deref,
66
ptr,
7-
sync::{Arc, LazyLock},
7+
sync::Arc,
88
};
99

10-
use regex::Regex;
1110
use rspack_cacheable::{
1211
cacheable,
1312
with::{AsPreset, Unsupported},
@@ -33,9 +32,6 @@ pub static FULL_HASH_PLACEHOLDER: &str = "[fullhash]";
3332
pub static CHUNK_HASH_PLACEHOLDER: &str = "[chunkhash]";
3433
pub static CONTENT_HASH_PLACEHOLDER: &str = "[contenthash]";
3534

36-
static DATA_URI_REGEX: LazyLock<Regex> =
37-
LazyLock::new(|| Regex::new(r"^data:([^;,]+)").expect("Invalid regex"));
38-
3935
#[cacheable]
4036
#[derive(PartialEq, Debug, Hash, Eq, Clone, PartialOrd, Ord)]
4137
enum FilenameKind {
@@ -200,14 +196,8 @@ fn render_template(
200196
let mut t = template;
201197
// file-level
202198
if let Some(filename) = options.filename {
203-
if let Some(caps) = DATA_URI_REGEX.captures(filename) {
204-
let ext = mime_guess::get_mime_extensions_str(
205-
caps
206-
.get(1)
207-
.expect("should match mime for data uri")
208-
.as_str(),
209-
)
210-
.map(|exts| exts[0]);
199+
if let Ok(caps) = data_uri(filename) {
200+
let ext = mime_guess::get_mime_extensions_str(caps).map(|exts| exts[0]);
211201

212202
let replacer = options
213203
.content_hash
@@ -347,3 +337,16 @@ fn render_template(
347337
}
348338
t.into_owned()
349339
}
340+
341+
fn data_uri(mut input: &str) -> winnow::ModalResult<&str> {
342+
use winnow::{combinator::preceded, prelude::*, token::take_till};
343+
344+
preceded("data:", take_till(1.., (';', ','))).parse_next(&mut input)
345+
}
346+
347+
#[test]
348+
fn test_data_uri() {
349+
assert_eq!(data_uri("data:good").ok(), Some("good"));
350+
assert_eq!(data_uri("data:g;ood").ok(), Some("g"));
351+
assert_eq!(data_uri("data:;ood").ok(), None);
352+
}
Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
1-
use std::sync::LazyLock;
2-
3-
use regex::Regex;
4-
5-
static COMMENT_END_REGEX: LazyLock<Regex> =
6-
LazyLock::new(|| Regex::new(r"\*\/").expect("should init regex"));
1+
use cow_utils::CowUtils;
72

83
#[inline]
94
pub fn to_comment(str: &str) -> String {
105
if str.is_empty() {
116
return String::new();
127
}
138

14-
let result = COMMENT_END_REGEX.replace_all(str, "* /");
15-
9+
let result = str.cow_replace("*/", "* /");
1610
format!("/*! {result} */")
1711
}
1812

@@ -22,7 +16,6 @@ pub fn to_comment_with_nl(str: &str) -> String {
2216
return String::new();
2317
}
2418

25-
let result = COMMENT_END_REGEX.replace_all(str, "* /");
26-
19+
let result = str.cow_replace("*/", "* /");
2720
format!("/*! {result} */\n")
2821
}

0 commit comments

Comments
 (0)