Skip to content

Commit 4ff7ebb

Browse files
committed
Corrected panic on UTF-8 searches.
1 parent 3ffd699 commit 4ff7ebb

File tree

9 files changed

+87
-20
lines changed

9 files changed

+87
-20
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
* Release notes are available on
44
[GitHub](https://github.com/leontoeides/indicium/releases).
55

6+
* `0.6.2`: Corrected [panic on UTF-8 searches](https://github.com/leontoeides/indicium/issues/2).
7+
68
* `0.6.1`: Removed `eddie` as the default string similarity crate, for now, due
79
to a potential `panic`.
810

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "indicium"
3-
version = "0.6.1"
3+
version = "0.6.2"
44
authors = ["Dylan Bowker <dylan.bowker@arkiteq.ca>"]
55
edition = "2021"
66
categories = [ "database-implementations" ]
@@ -17,7 +17,7 @@ rust-version = "1.62.1"
1717
default = [ "simple", "strsim", "ahash" ]
1818
simple = []
1919
select2 = [ "simple", "serde" ]
20-
fuzzy = [ "eddie" ] # Deprecated feature. Redirects to `eddie` feature.
20+
fuzzy = [ "strsim" ] # Deprecated feature. Redirects to `strsim` feature.
2121
ahash = [ "dep:ahash" ]
2222
eddie = [ "dep:eddie" ]
2323
gxhash = [ "dep:gxhash" ]

src/simple/internal/eddie/eddie_context_autocomplete.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
4646
// The user keyword must be longer than the match length to be
4747
// evaluated for fuzzy-matches:
4848
if user_keyword.len() >= self.fuzzy_length {
49-
// Use the first _n_ characters of the user's keyword to find
50-
// search index keywords to compare against:
51-
&user_keyword[0..self.fuzzy_length]
49+
// Get the byte index of the _n_th character:
50+
let byte_index: Option<usize> = user_keyword
51+
.char_indices()
52+
.take(self.fuzzy_length)
53+
.map(|(idx, _ch)| idx)
54+
.max();
55+
// Use the first _n_ characters of the user's keyword. These
56+
// first characters are used to find search index keywords to
57+
// fuzzy match against:
58+
match byte_index {
59+
Some(byte_index) => &user_keyword[0..byte_index],
60+
None => return vec![],
61+
} // match
5262
} else {
5363
// The user's keyword is too short. Do not perform any fuzzy
5464
// matching:

src/simple/internal/eddie/eddie_global_autocomplete.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
4545
// The user keyword must be longer than the match length to be
4646
// evaluated for fuzzy-matches:
4747
if user_keyword.len() >= self.fuzzy_length {
48-
// Use the first _n_ characters of the user's keyword to find
49-
// search index keywords to compare against:
50-
&user_keyword[0..self.fuzzy_length]
48+
// Get the byte index of the _n_th character:
49+
let byte_index: Option<usize> = user_keyword
50+
.char_indices()
51+
.take(self.fuzzy_length)
52+
.map(|(idx, _ch)| idx)
53+
.max();
54+
// Use the first _n_ characters of the user's keyword. These
55+
// first characters are used to find search index keywords to
56+
// fuzzy match against:
57+
match byte_index {
58+
Some(byte_index) => &user_keyword[0..byte_index],
59+
None => return vec![],
60+
} // match
5161
} else {
5262
// The user's keyword is too short. Do not perform any fuzzy
5363
// matching:

src/simple/internal/eddie/eddie_global_keyword.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
4141
// The user keyword must be longer than the match length to be
4242
// evaluated for fuzzy-matches:
4343
if user_keyword.len() >= self.fuzzy_length {
44-
// Use the first _n_ characters of the user's keyword to find
45-
// search index keywords to compare against:
46-
&user_keyword[0..self.fuzzy_length]
44+
// Get the byte index of the _n_th character:
45+
let byte_index: Option<usize> = user_keyword
46+
.char_indices()
47+
.take(self.fuzzy_length)
48+
.map(|(idx, _ch)| idx)
49+
.max();
50+
// Use the first _n_ characters of the user's keyword. These
51+
// first characters are used to find search index keywords to
52+
// fuzzy match against:
53+
match byte_index {
54+
Some(byte_index) => &user_keyword[0..byte_index],
55+
None => return vec![],
56+
} // match
4757
} else {
4858
// The user's keyword is too short. Do not perform any fuzzy
4959
// matching:

src/simple/internal/strsim/strsim_context_autocomplete.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
4646
// The user keyword must be longer than the match length to be
4747
// evaluated for fuzzy-matches:
4848
if user_keyword.len() >= self.fuzzy_length {
49-
// Use the first _n_ characters of the user's keyword to find
50-
// search index keywords to compare against:
51-
&user_keyword[0..self.fuzzy_length]
49+
// Get the byte index of the _n_th character:
50+
let byte_index: Option<usize> = user_keyword
51+
.char_indices()
52+
.take(self.fuzzy_length)
53+
.map(|(idx, _ch)| idx)
54+
.max();
55+
// Use the first _n_ characters of the user's keyword. These
56+
// first characters are used to find search index keywords to
57+
// fuzzy match against:
58+
match byte_index {
59+
Some(byte_index) => &user_keyword[0..byte_index],
60+
None => return vec![],
61+
} // match
5262
} else {
5363
// The user's keyword is too short. Do not perform any fuzzy
5464
// matching:

src/simple/internal/strsim/strsim_global_autocomplete.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
4545
// The user keyword must be longer than the match length to be
4646
// evaluated for fuzzy-matches:
4747
if user_keyword.len() >= self.fuzzy_length {
48-
// Use the first _n_ characters of the user's keyword to find
49-
// search index keywords to compare against:
50-
&user_keyword[0..self.fuzzy_length]
48+
// Get the byte index of the _n_th character:
49+
let byte_index: Option<usize> = user_keyword
50+
.char_indices()
51+
.take(self.fuzzy_length)
52+
.map(|(idx, _ch)| idx)
53+
.max();
54+
// Use the first _n_ characters of the user's keyword. These
55+
// first characters are used to find search index keywords to
56+
// fuzzy match against:
57+
match byte_index {
58+
Some(byte_index) => &user_keyword[0..byte_index],
59+
None => return vec![],
60+
} // match
5161
} else {
5262
// The user's keyword is too short. Do not perform any fuzzy
5363
// matching:

src/simple/internal/strsim/strsim_global_keyword.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,19 @@ impl<K: Hash + Ord> SearchIndex<K> {
4141
// The user keyword must be longer than the match length to be
4242
// evaluated for fuzzy-matches:
4343
if user_keyword.len() >= self.fuzzy_length {
44-
// Use the first _n_ characters of the user's keyword to find
45-
// search index keywords to compare against:
46-
&user_keyword[0..self.fuzzy_length]
44+
// Get the byte index of the _n_th character:
45+
let byte_index: Option<usize> = user_keyword
46+
.char_indices()
47+
.take(self.fuzzy_length)
48+
.map(|(idx, _ch)| idx)
49+
.max();
50+
// Use the first _n_ characters of the user's keyword. These
51+
// first characters are used to find search index keywords to
52+
// fuzzy match against:
53+
match byte_index {
54+
Some(byte_index) => &user_keyword[0..byte_index],
55+
None => return None,
56+
} // match
4757
} else {
4858
// The user's keyword is too short. Do not perform any fuzzy
4959
// matching:

src/simple/tests.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,4 +291,9 @@ fn simple() {
291291
search_index.autocomplete_type(&AutocompleteType::Context, "stars are dancers");
292292
#[cfg(any(feature = "eddie", feature = "strsim"))]
293293
assert_eq!(autocomplete_options, vec!["stars are dancing".to_string()]);
294+
295+
// Test UTF-8:
296+
let index = crate::simple::SearchIndex::<usize>::default();
297+
index.search("лол"); // lol in Cyrillic
298+
294299
} // fn

0 commit comments

Comments
 (0)