@@ -486,9 +486,9 @@ count: int = sz.count("haystack", "needle", start=0, end=sys.maxsize, allowoverl
486486### Edit Distances
487487
488488``` py
489- assert sz.edit_distance (" apple" , " aple" ) == 1 # skip one ASCII character
490- assert sz.edit_distance (" αβγδ" , " αγδ" ) == 2 # skip two bytes forming one rune
491- assert sz.edit_distance_unicode (" αβγδ" , " αγδ" ) == 1 # one unicode rune
489+ assert sz.levenshtein_distance (" apple" , " aple" ) == 1 # skip one ASCII character
490+ assert sz.levenshtein_distance (" αβγδ" , " αγδ" ) == 2 # skip two bytes forming one rune
491+ assert sz.levenshtein_distance_unicode (" αβγδ" , " αγδ" ) == 1 # one unicode rune
492492```
493493
494494Several Python libraries provide edit distance computation.
@@ -513,7 +513,7 @@ costs = np.zeros((256, 256), dtype=np.int8)
513513costs.fill(- 1 )
514514np.fill_diagonal(costs, 0 )
515515
516- assert sz.alignment_score(" first" , " second" , substitution_matrix = costs, gap_score = - 1 ) == - sz.edit_distance (a, b)
516+ assert sz.alignment_score(" first" , " second" , substitution_matrix = costs, gap_score = - 1 ) == - sz.levenshtein_distance (a, b)
517517```
518518
519519Using the same proteins as for Levenshtein distance benchmarks:
@@ -1088,8 +1088,8 @@ Standard library functions may not offer the most efficient or convenient method
10881088- ` haystack.replace_all(sz::byteset(""), replacement_string) `
10891089- ` haystack.try_replace_all(needle_string, replacement_string) `
10901090- ` haystack.try_replace_all(sz::byteset(""), replacement_string) `
1091- - ` haystack.transform (sz::look_up_table::identity()) `
1092- - ` haystack.transform (sz::look_up_table::identity(), haystack.data()) `
1091+ - ` haystack.lookup (sz::look_up_table::identity()) `
1092+ - ` haystack.lookup (sz::look_up_table::identity(), haystack.data()) `
10931093
10941094### Levenshtein Edit Distance and Alignment Scores
10951095
@@ -1103,8 +1103,8 @@ sz::hamming_distance(first, second[, upper_bound]) -> std::size_t;
11031103sz::hamming_distance_utf8(first, second[ , upper_bound] ) -> std::size_t;
11041104
11051105// Count number of insertions, deletions and substitutions
1106- sz::edit_distance (first, second[ , upper_bound[ , allocator]] ) -> std::size_t;
1107- sz::edit_distance_utf8 (first, second[ , upper_bound[ , allocator]] ) -> std::size_t;
1106+ sz::levenshtein_distance (first, second[ , upper_bound[ , allocator]] ) -> std::size_t;
1107+ sz::levenshtein_distance_utf8 (first, second[ , upper_bound[ , allocator]] ) -> std::size_t;
11081108
11091109// Substitution-parametrized Needleman-Wunsch global alignment score
11101110std::int8_t costs[ 256] [ 256 ] ; // Substitution costs matrix
@@ -1160,8 +1160,8 @@ The performance of those containers is often limited by the performance of the s
11601160StringZilla can be used to accelerate containers with ` std::string ` keys, by overriding the default comparator and hash functions.
11611161
11621162``` cpp
1163- std::map<std::string, int , sz::string_view_less > sorted_words;
1164- std::unordered_map<std::string, int , sz::string_view_hash , sz::string_view_equal_to > words;
1163+ std::map<std::string, int , sz::less > sorted_words;
1164+ std::unordered_map<std::string, int , sz::hash , sz::equal_to > words;
11651165```
11661166
11671167Alternatively, a better approach would be to use the ` sz::string ` class as a key.
@@ -1278,19 +1278,19 @@ assert_eq!(my_str.sz_find("world"), Some(7));
12781278assert_eq! (my_cow_str . as_ref (). sz_find (" world" ), Some (7 ));
12791279```
12801280
1281- The library also exposes Levenshtein and Hamming edit-distances for byte-arrays and UTF-8 strings, as well as Needleman-Wunch alignment scores.
1281+ The library also exposes Levenshtein and Hamming edit-distances for byte-arrays and UTF-8 strings, as well as Needleman-Wunsch alignment scores.
12821282
12831283``` rust
12841284use stringzilla :: sz;
12851285
12861286// Handling arbitrary byte arrays:
1287- sz :: edit_distance (" Hello, world!" , " Hello, world?" ); // 1
1287+ sz :: levenshtein_distance (" Hello, world!" , " Hello, world?" ); // 1
12881288sz :: hamming_distance (" Hello, world!" , " Hello, world?" ); // 1
12891289sz :: alignment_score (" Hello, world!" , " Hello, world?" , sz :: unary_substitution_costs (), - 1 ); // -1
12901290
12911291// Handling UTF-8 strings:
12921292sz :: hamming_distance_utf8 (" αβγδ" , " αγγδ" ) // 1
1293- sz :: edit_distance_utf8 (" façade" , " facade" ) // 1
1293+ sz :: levenshtein_distance_utf8 (" façade" , " facade" ) // 1
12941294```
12951295
12961296[ memchr-benchmarks ] : https://github.com/ashvardanian/memchr_vs_stringzilla
@@ -1465,7 +1465,7 @@ In AVX-512, StringZilla uses non-temporal stores to avoid cache pollution, when
14651465Moreover, it handles the unaligned head and the tails of the ` target ` buffer separately, ensuring that writes in big copies are always aligned to cache-line boundaries.
14661466That's true for both AVX2 and AVX-512 backends.
14671467
1468- StringZilla also contains "drafts" of smarter, but less efficient algorithms, that minimize the number of unaligned loads, perfoming shuffles and permutations.
1468+ StringZilla also contains "drafts" of smarter, but less efficient algorithms, that minimize the number of unaligned loads, performing shuffles and permutations.
14691469That's a topic for future research, as the performance gains are not yet satisfactory.
14701470
14711471> § Reading materials.
0 commit comments