@@ -319,8 +319,7 @@ where
319319 }
320320
321321 // strip common suffix
322- let source = & source[ ..source. len ( ) - end] ;
323- let target = & target[ ..target. len ( ) - end] ;
322+ let ( source, target) = ( & source[ ..source. len ( ) - end] , & target[ ..target. len ( ) - end] ) ;
324323
325324 let mut start = source
326325 . bytes ( )
@@ -334,8 +333,7 @@ where
334333 }
335334
336335 // strip common prefix
337- let source = & source[ start..] ;
338- let target = & target[ start..] ;
336+ let ( source, target) = ( & source[ start..] , & target[ start..] ) ;
339337
340338 // -- [the adapted code from C++ starts here] ---
341339
@@ -347,6 +345,23 @@ where
347345 return source. chars ( ) . count ( ) ;
348346 }
349347
348+ // --- [this is not part of the adapted code from C++] ---
349+ // micro optimization: `costs` vector has the same cardinality
350+ // of target's chars so we try to reduce memory allocations by
351+ // assigning the smallest string (bytes len) to target.
352+ // the memory reduction actually depends on the chars composing
353+ // the string, so we optimistically bet that the fewer bytes that
354+ // make up the string, the fewer chars will be present in that string.
355+ // this is not always true and in those cases we could even end up
356+ // allocating more, but those cases should be rare enough to
357+ // justify this optimization.
358+ let ( source, target) = if source. len ( ) < target. len ( ) {
359+ ( target, source)
360+ } else {
361+ ( source, target)
362+ } ;
363+ // --------------------------------------------------------------------
364+
350365 let target_len = target. chars ( ) . count ( ) ;
351366 let mut costs = ( 0 ..=target_len) . collect :: < Vec < _ > > ( ) ;
352367
@@ -1161,7 +1176,7 @@ mod tests {
11611176
11621177 #[ test]
11631178 fn levenshtein_distance ( ) {
1164- const SEED : [ ( & str , & str , usize ) ; 15 ] = [
1179+ const SEED : [ ( & str , & str , usize ) ; 17 ] = [
11651180 ( "" , "" , 0 ) ,
11661181 ( "" , "a" , 1 ) ,
11671182 ( "a" , "" , 1 ) ,
@@ -1177,6 +1192,8 @@ mod tests {
11771192 ( "agĀin" , "agāin" , 1 ) ,
11781193 ( "cafexĀ" , "cafeyȀ" , 2 ) ,
11791194 ( "Āxcafe" , "Ȁycafe" , 2 ) ,
1195+ ( "lorem ipsum dolor" , "ipsum" , 12 ) ,
1196+ ( "ipsum" , "lorem ipsum dolor" , 12 ) ,
11801197 ] ;
11811198
11821199 for ( sut, other, expected) in SEED {
0 commit comments