@@ -6,8 +6,7 @@ use regex::Regex;
66use rspack_collections:: { DatabaseItem , UkeyMap } ;
77use rspack_core:: incremental:: Mutation ;
88use rspack_core:: {
9- compare_modules_by_identifier, ChunkUkey , Compilation , CompilerOptions , Module , ModuleIdentifier ,
10- DEFAULT_DELIMITER ,
9+ ChunkUkey , Compilation , CompilerOptions , Module , ModuleIdentifier , DEFAULT_DELIMITER ,
1110} ;
1211use rspack_error:: Result ;
1312use rspack_hash:: { RspackHash , RspackHashDigest } ;
@@ -28,17 +27,19 @@ struct Group {
2827 nodes : Vec < GroupItem > ,
2928 pub size : SplitChunkSizes ,
3029 pub key : Option < String > ,
30+ pub similarities : Vec < usize > ,
3131}
3232
3333impl Group {
34- fn new ( items : Vec < GroupItem > , key : Option < String > ) -> Self {
34+ fn new ( items : Vec < GroupItem > , key : Option < String > , similarities : Vec < usize > ) -> Self {
3535 let mut summed_size = SplitChunkSizes :: empty ( ) ;
3636 items. iter ( ) . for_each ( |item| summed_size. add_by ( & item. size ) ) ;
3737
3838 Self {
3939 nodes : items,
4040 size : summed_size,
4141 key,
42+ similarities,
4243 }
4344 }
4445}
@@ -84,21 +85,19 @@ fn deterministic_grouping_for_modules(
8485) -> Vec < Group > {
8586 let mut results: Vec < Group > = Default :: default ( ) ;
8687 let module_graph = compilation. get_module_graph ( ) ;
87- let mut items = compilation
88+ let items = compilation
8889 . chunk_graph
8990 . get_chunk_modules ( chunk, & module_graph) ;
90-
91- items. sort_unstable_by ( |a, b| compare_modules_by_identifier ( a, b) ) ;
92-
9391 let context = compilation. options . context . as_ref ( ) ;
9492
9593 let nodes = items. into_iter ( ) . map ( |module| {
9694 let module: & dyn Module = & * * module;
97- let name: String = if module . name_for_condition ( ) . is_some ( ) {
98- make_paths_relative ( context, module . identifier ( ) . as_str ( ) )
95+ let name: String = if let Some ( name_for_condition ) = module . name_for_condition ( ) {
96+ make_paths_relative ( context, & name_for_condition )
9997 } else {
98+ let path = make_paths_relative ( context, module. identifier ( ) . as_str ( ) ) ;
10099 REPLACE_MODULE_IDENTIFIER_REG
101- . replace_all ( & module . identifier ( ) , "" )
100+ . replace_all ( & path , "" )
102101 . to_string ( )
103102 } ;
104103 let key = format ! (
@@ -114,7 +113,7 @@ fn deterministic_grouping_for_modules(
114113 }
115114 } ) ;
116115
117- let initial_nodes = nodes
116+ let mut initial_nodes = nodes
118117 . into_iter ( )
119118 . filter_map ( |node| {
120119 // The Module itself is already bigger than `allow_max_size`, we will create a chunk
@@ -127,16 +126,19 @@ fn deterministic_grouping_for_modules(
127126 allow_max_size
128127 ) ;
129128 let key = node. key . clone ( ) ;
130- results. push ( Group :: new ( vec ! [ node] , Some ( key) ) ) ;
129+ results. push ( Group :: new ( vec ! [ node] , Some ( key) , vec ! [ ] ) ) ;
131130 None
132131 } else {
133132 Some ( node)
134133 }
135134 } )
136135 . collect :: < Vec < _ > > ( ) ;
137136
137+ initial_nodes. sort_by ( |a, b| a. key . cmp ( & b. key ) ) ;
138+
138139 if !initial_nodes. is_empty ( ) {
139- let initial_group = Group :: new ( initial_nodes, None ) ;
140+ let similarities = get_similarities ( & initial_nodes) ;
141+ let initial_group = Group :: new ( initial_nodes, None , similarities) ;
140142
141143 let mut queue = vec ! [ initial_group] ;
142144
@@ -159,16 +161,17 @@ fn deterministic_grouping_for_modules(
159161 left += 1 ;
160162 }
161163
162- let mut right = group. nodes . len ( ) - 2 ;
164+ let mut right: i32 = group. nodes . len ( ) as i32 - 2 ;
163165 let mut right_size = SplitChunkSizes :: empty ( ) ;
164- right_size. add_by ( & group. nodes [ right + 1 ] . size ) ;
165- while right != 0 && right_size. smaller_than ( min_size) {
166- right_size. add_by ( & group. nodes [ right] . size ) ;
166+ right_size. add_by ( & group. nodes [ right as usize + 1 ] . size ) ;
167167
168- right = right. saturating_sub ( 1 ) ;
168+ while right >= 0 && right_size. smaller_than ( min_size) {
169+ right_size. add_by ( & group. nodes [ right as usize ] . size ) ;
170+
171+ right -= 1 ;
169172 }
170173
171- if left - 1 > right {
174+ if left - 1 > right as usize {
172175 // There are overlaps
173176
174177 // TODO(hyf0): There are some algorithms we could do better in this
@@ -182,11 +185,53 @@ fn deterministic_grouping_for_modules(
182185 results. push ( group) ;
183186 continue ;
184187 } else {
188+ let mut pos = left;
189+ let mut best = -1 ;
190+ let mut best_similarity = usize:: MAX ;
191+ right_size = group. nodes . iter ( ) . rev ( ) . take ( group. nodes . len ( ) - pos) . fold (
192+ SplitChunkSizes :: empty ( ) ,
193+ |mut acc, node| {
194+ acc. add_by ( & node. size ) ;
195+ acc
196+ } ,
197+ ) ;
198+
199+ while pos <= right as usize + 1 {
200+ let similarity = group. similarities [ pos - 1 ] ;
201+ if similarity < best_similarity
202+ && left_size. bigger_than ( min_size)
203+ && right_size. bigger_than ( min_size)
204+ {
205+ best_similarity = similarity;
206+ best = pos as i32 ;
207+ }
208+ let size = & group. nodes [ pos] . size ;
209+ left_size. add_by ( size) ;
210+ right_size. subtract_by ( size) ;
211+ pos += 1 ;
212+ }
213+
214+ if best == -1 {
215+ results. push ( group) ;
216+ continue ;
217+ }
218+
219+ left = best as usize ;
220+ right = best - 1 ;
221+
222+ let mut right_similarities = vec ! [ ] ;
223+ for i in right as usize + 2 ..group. nodes . len ( ) {
224+ right_similarities. push ( ( group. similarities ) [ i - 1 ] ) ;
225+ }
226+
227+ let mut left_similarities = vec ! [ ] ;
228+ for i in 1 ..left {
229+ left_similarities. push ( ( group. similarities ) [ i - 1 ] ) ;
230+ }
185231 let right_nodes = group. nodes . split_off ( left) ;
186232 let left_nodes = group. nodes ;
187-
188- queue. push ( Group :: new ( right_nodes, None ) ) ;
189- queue. push ( Group :: new ( left_nodes, None ) ) ;
233+ queue. push ( Group :: new ( right_nodes, None , right_similarities) ) ;
234+ queue. push ( Group :: new ( left_nodes, None , left_similarities) ) ;
190235 }
191236 }
192237 }
@@ -204,6 +249,31 @@ struct ChunkWithSizeInfo<'a> {
204249 pub automatic_name_delimiter : & ' a String ,
205250}
206251
252+ fn get_similarities ( nodes : & [ GroupItem ] ) -> Vec < usize > {
253+ let mut similarities = Vec :: with_capacity ( nodes. len ( ) ) ;
254+ let mut nodes = nodes. iter ( ) ;
255+ let Some ( mut last) = nodes. next ( ) else {
256+ return similarities;
257+ } ;
258+
259+ for node in nodes {
260+ similarities. push ( similarity ( & last. key , & node. key ) ) ;
261+ last = node;
262+ }
263+
264+ similarities
265+ }
266+
267+ fn similarity ( a : & str , b : & str ) -> usize {
268+ let mut a = a. chars ( ) ;
269+ let mut b = b. chars ( ) ;
270+ let mut dist = 0 ;
271+ while let ( Some ( ca) , Some ( cb) ) = ( a. next ( ) , b. next ( ) ) {
272+ dist += std:: cmp:: max ( 0 , 10 - ( ca as i32 - cb as i32 ) . abs ( ) ) ;
273+ }
274+ dist as usize
275+ }
276+
207277impl SplitChunksPlugin {
208278 /// Affected by `splitChunks.minSize`/`splitChunks.cacheGroups.{cacheGroup}.minSize`
209279 #[ tracing:: instrument( skip_all) ]
0 commit comments