Skip to content

Commit d476fda

Browse files
authored
feat: split big chunks based on modules path similarities (#8775)
1 parent d174ed6 commit d476fda

File tree

17 files changed

+8847
-22
lines changed

17 files changed

+8847
-22
lines changed

crates/rspack_plugin_split_chunks/src/common.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ impl SplitChunkSizes {
122122
pub fn add_by(&mut self, other: &Self) {
123123
self.combine_with(other, &|a, b| a + b)
124124
}
125+
126+
pub fn subtract_by(&mut self, other: &Self) {
127+
self.combine_with(other, &|a, b| a - b)
128+
}
125129
}
126130

127131
impl Deref for SplitChunkSizes {

crates/rspack_plugin_split_chunks/src/plugin/max_size.rs

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@ use regex::Regex;
66
use rspack_collections::{DatabaseItem, UkeyMap};
77
use rspack_core::incremental::Mutation;
88
use rspack_core::{
9-
compare_modules_by_identifier, ChunkUkey, Compilation, CompilerOptions, Module, ModuleIdentifier,
10-
DEFAULT_DELIMITER,
9+
ChunkUkey, Compilation, CompilerOptions, Module, ModuleIdentifier, DEFAULT_DELIMITER,
1110
};
1211
use rspack_error::Result;
1312
use rspack_hash::{RspackHash, RspackHashDigest};
@@ -28,17 +27,19 @@ struct Group {
2827
nodes: Vec<GroupItem>,
2928
pub size: SplitChunkSizes,
3029
pub key: Option<String>,
30+
pub similarities: Vec<usize>,
3131
}
3232

3333
impl Group {
34-
fn new(items: Vec<GroupItem>, key: Option<String>) -> Self {
34+
fn new(items: Vec<GroupItem>, key: Option<String>, similarities: Vec<usize>) -> Self {
3535
let mut summed_size = SplitChunkSizes::empty();
3636
items.iter().for_each(|item| summed_size.add_by(&item.size));
3737

3838
Self {
3939
nodes: items,
4040
size: summed_size,
4141
key,
42+
similarities,
4243
}
4344
}
4445
}
@@ -84,21 +85,19 @@ fn deterministic_grouping_for_modules(
8485
) -> Vec<Group> {
8586
let mut results: Vec<Group> = Default::default();
8687
let module_graph = compilation.get_module_graph();
87-
let mut items = compilation
88+
let items = compilation
8889
.chunk_graph
8990
.get_chunk_modules(chunk, &module_graph);
90-
91-
items.sort_unstable_by(|a, b| compare_modules_by_identifier(a, b));
92-
9391
let context = compilation.options.context.as_ref();
9492

9593
let nodes = items.into_iter().map(|module| {
9694
let module: &dyn Module = &**module;
97-
let name: String = if module.name_for_condition().is_some() {
98-
make_paths_relative(context, module.identifier().as_str())
95+
let name: String = if let Some(name_for_condition) = module.name_for_condition() {
96+
make_paths_relative(context, &name_for_condition)
9997
} else {
98+
let path = make_paths_relative(context, module.identifier().as_str());
10099
REPLACE_MODULE_IDENTIFIER_REG
101-
.replace_all(&module.identifier(), "")
100+
.replace_all(&path, "")
102101
.to_string()
103102
};
104103
let key = format!(
@@ -114,7 +113,7 @@ fn deterministic_grouping_for_modules(
114113
}
115114
});
116115

117-
let initial_nodes = nodes
116+
let mut initial_nodes = nodes
118117
.into_iter()
119118
.filter_map(|node| {
120119
// The Module itself is already bigger than `allow_max_size`, we will create a chunk
@@ -127,16 +126,19 @@ fn deterministic_grouping_for_modules(
127126
allow_max_size
128127
);
129128
let key = node.key.clone();
130-
results.push(Group::new(vec![node], Some(key)));
129+
results.push(Group::new(vec![node], Some(key), vec![]));
131130
None
132131
} else {
133132
Some(node)
134133
}
135134
})
136135
.collect::<Vec<_>>();
137136

137+
initial_nodes.sort_by(|a, b| a.key.cmp(&b.key));
138+
138139
if !initial_nodes.is_empty() {
139-
let initial_group = Group::new(initial_nodes, None);
140+
let similarities = get_similarities(&initial_nodes);
141+
let initial_group = Group::new(initial_nodes, None, similarities);
140142

141143
let mut queue = vec![initial_group];
142144

@@ -159,16 +161,17 @@ fn deterministic_grouping_for_modules(
159161
left += 1;
160162
}
161163

162-
let mut right = group.nodes.len() - 2;
164+
let mut right: i32 = group.nodes.len() as i32 - 2;
163165
let mut right_size = SplitChunkSizes::empty();
164-
right_size.add_by(&group.nodes[right + 1].size);
165-
while right != 0 && right_size.smaller_than(min_size) {
166-
right_size.add_by(&group.nodes[right].size);
166+
right_size.add_by(&group.nodes[right as usize + 1].size);
167167

168-
right = right.saturating_sub(1);
168+
while right >= 0 && right_size.smaller_than(min_size) {
169+
right_size.add_by(&group.nodes[right as usize].size);
170+
171+
right -= 1;
169172
}
170173

171-
if left - 1 > right {
174+
if left - 1 > right as usize {
172175
// There are overlaps
173176

174177
// TODO(hyf0): There are some algorithms we could do better in this
@@ -182,11 +185,53 @@ fn deterministic_grouping_for_modules(
182185
results.push(group);
183186
continue;
184187
} else {
188+
let mut pos = left;
189+
let mut best = -1;
190+
let mut best_similarity = usize::MAX;
191+
right_size = group.nodes.iter().rev().take(group.nodes.len() - pos).fold(
192+
SplitChunkSizes::empty(),
193+
|mut acc, node| {
194+
acc.add_by(&node.size);
195+
acc
196+
},
197+
);
198+
199+
while pos <= right as usize + 1 {
200+
let similarity = group.similarities[pos - 1];
201+
if similarity < best_similarity
202+
&& left_size.bigger_than(min_size)
203+
&& right_size.bigger_than(min_size)
204+
{
205+
best_similarity = similarity;
206+
best = pos as i32;
207+
}
208+
let size = &group.nodes[pos].size;
209+
left_size.add_by(size);
210+
right_size.subtract_by(size);
211+
pos += 1;
212+
}
213+
214+
if best == -1 {
215+
results.push(group);
216+
continue;
217+
}
218+
219+
left = best as usize;
220+
right = best - 1;
221+
222+
let mut right_similarities = vec![];
223+
for i in right as usize + 2..group.nodes.len() {
224+
right_similarities.push((group.similarities)[i - 1]);
225+
}
226+
227+
let mut left_similarities = vec![];
228+
for i in 1..left {
229+
left_similarities.push((group.similarities)[i - 1]);
230+
}
185231
let right_nodes = group.nodes.split_off(left);
186232
let left_nodes = group.nodes;
187-
188-
queue.push(Group::new(right_nodes, None));
189-
queue.push(Group::new(left_nodes, None));
233+
queue.push(Group::new(right_nodes, None, right_similarities));
234+
queue.push(Group::new(left_nodes, None, left_similarities));
190235
}
191236
}
192237
}
@@ -204,6 +249,31 @@ struct ChunkWithSizeInfo<'a> {
204249
pub automatic_name_delimiter: &'a String,
205250
}
206251

252+
fn get_similarities(nodes: &[GroupItem]) -> Vec<usize> {
253+
let mut similarities = Vec::with_capacity(nodes.len());
254+
let mut nodes = nodes.iter();
255+
let Some(mut last) = nodes.next() else {
256+
return similarities;
257+
};
258+
259+
for node in nodes {
260+
similarities.push(similarity(&last.key, &node.key));
261+
last = node;
262+
}
263+
264+
similarities
265+
}
266+
267+
fn similarity(a: &str, b: &str) -> usize {
268+
let mut a = a.chars();
269+
let mut b = b.chars();
270+
let mut dist = 0;
271+
while let (Some(ca), Some(cb)) = (a.next(), b.next()) {
272+
dist += std::cmp::max(0, 10 - (ca as i32 - cb as i32).abs());
273+
}
274+
dist as usize
275+
}
276+
207277
impl SplitChunksPlugin {
208278
/// Affected by `splitChunks.minSize`/`splitChunks.cacheGroups.{cacheGroup}.minSize`
209279
#[tracing::instrument(skip_all)]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/** @type {import("@rspack/core").Configuration} */
2+
module.exports = {
3+
target: 'node',
4+
entry: "./src/index.js",
5+
output: {
6+
filename: '[name].js'
7+
},
8+
optimization: {
9+
chunkIds: 'named',
10+
moduleIds: 'named',
11+
splitChunks: {
12+
chunks: "all",
13+
cacheGroups: {
14+
fragment: {
15+
minChunks: 1,
16+
maxSize: 200 * 1024,
17+
priority: 10,
18+
}
19+
}
20+
}
21+
}
22+
};

0 commit comments

Comments
 (0)