Skip to content

Commit b058c55

Browse files
committed
optimizations + lower msrv
- optimize memory allocations on levenshtein distance - lower minimum supported rust version
1 parent 2800e77 commit b058c55

File tree

3 files changed

+25
-8
lines changed

3 files changed

+25
-8
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
authors = ["Davide Di Carlo <daddinuz@gmail.com>"]
33
description = "Extension traits for `String` and `&str` types."
44
name = "string_more"
5-
version = "0.2.0"
5+
version = "0.2.1"
66
edition = "2021"
77
license = "MIT"
88
keywords = ["String", "str", "extension", "in-place", "edit-distance"]
9-
rust-version = "1.80"
9+
rust-version = "1.60"
1010
repository = "https://github.com/daddinuz/string_more"
1111

1212
[dependencies]

src/lib.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,7 @@ where
319319
}
320320

321321
// strip common suffix
322-
let source = &source[..source.len() - end];
323-
let target = &target[..target.len() - end];
322+
let (source, target) = (&source[..source.len() - end], &target[..target.len() - end]);
324323

325324
let mut start = source
326325
.bytes()
@@ -334,8 +333,7 @@ where
334333
}
335334

336335
// strip common prefix
337-
let source = &source[start..];
338-
let target = &target[start..];
336+
let (source, target) = (&source[start..], &target[start..]);
339337

340338
// -- [the adapted code from C++ starts here] ---
341339

@@ -347,6 +345,23 @@ where
347345
return source.chars().count();
348346
}
349347

348+
// --- [this is not part of the adapted code from C++] ---
349+
// micro optimization: `costs` vector has the same cardinality
350+
// of target's chars so we try to reduce memory allocations by
351+
// assigning the smallest string (bytes len) to target.
352+
// the memory reduction actually depends on the chars composing
353+
// the string, so we optimistically bet that the fewer bytes that
354+
// make up the string, the fewer chars will be present in that string.
355+
// this is not always true and in those cases we could even end up
356+
// allocating more, but those cases should be rare enough to
357+
// justify this optimization.
358+
let (source, target) = if source.len() < target.len() {
359+
(target, source)
360+
} else {
361+
(source, target)
362+
};
363+
// --------------------------------------------------------------------
364+
350365
let target_len = target.chars().count();
351366
let mut costs = (0..=target_len).collect::<Vec<_>>();
352367

@@ -1161,7 +1176,7 @@ mod tests {
11611176

11621177
#[test]
11631178
fn levenshtein_distance() {
1164-
const SEED: [(&str, &str, usize); 15] = [
1179+
const SEED: [(&str, &str, usize); 17] = [
11651180
("", "", 0),
11661181
("", "a", 1),
11671182
("a", "", 1),
@@ -1177,6 +1192,8 @@ mod tests {
11771192
("agĀin", "agāin", 1),
11781193
("cafexĀ", "cafeyȀ", 2),
11791194
("Āxcafe", "Ȁycafe", 2),
1195+
("lorem ipsum dolor", "ipsum", 12),
1196+
("ipsum", "lorem ipsum dolor", 12),
11801197
];
11811198

11821199
for (sut, other, expected) in SEED {

0 commit comments

Comments
 (0)