Skip to content

Commit 72f82b6

Browse files
committed
Add lcp_array, z_algorithm
1 parent 86ed9cc commit 72f82b6

File tree

2 files changed

+106
-1
lines changed

2 files changed

+106
-1
lines changed

src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,7 @@ pub(crate) mod internal_scc;
1818
pub(crate) mod internal_type_traits;
1919

2020
pub use fenwicktree::FenwickTree;
21-
pub use string::{suffix_array, suffix_array_arbitrary, suffix_array_manual};
21+
pub use string::{
22+
lcp_array, lcp_array_arbitrary, suffix_array, suffix_array_arbitrary, suffix_array_manual,
23+
z_algorithm, z_algorithm_arbitrary,
24+
};

src/string.rs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,78 @@ pub fn suffix_array(s: impl IntoIterator<Item = char>) -> Vec<usize> {
241241
sa_is::<DefaultThreshold>(&s2, 255)
242242
}
243243

244+
// Reference:
245+
// T. Kasai, G. Lee, H. Arimura, S. Arikawa, and K. Park,
246+
// Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its
247+
// Applications
248+
pub fn lcp_array_arbitrary<T: Ord>(s: &[T], sa: &[usize]) -> Vec<usize> {
249+
let n = s.len();
250+
assert!(n >= 1);
251+
let mut rnk = vec![0; n];
252+
for i in 0..n {
253+
rnk[sa[i]] = i;
254+
}
255+
let mut lcp = vec![0; n - 1];
256+
let mut h = 0;
257+
for i in 0..n - 1 {
258+
if h > 0 {
259+
h -= 1;
260+
}
261+
if rnk[i] == 0 {
262+
continue;
263+
}
264+
let j = sa[rnk[i] - 1];
265+
while j + h < n && i + h < n {
266+
if s[j + h] != s[i + h] {
267+
break;
268+
}
269+
h += 1;
270+
}
271+
lcp[rnk[i] - 1] = h;
272+
}
273+
lcp
274+
}
275+
276+
pub fn lcp_array(s: &str, sa: &[usize]) -> Vec<usize> {
277+
let s: &[u8] = s.as_bytes();
278+
lcp_array_arbitrary(s, sa)
279+
}
280+
281+
// Reference:
282+
// D. Gusfield,
283+
// Algorithms on Strings, Trees, and Sequences: Computer Science and
284+
// Computational Biology
285+
pub fn z_algorithm_arbitrary<T: Ord>(s: &[T]) -> Vec<usize> {
286+
let n = s.len();
287+
if n == 0 {
288+
return vec![];
289+
}
290+
let mut z = vec![0; n];
291+
z[0] = 0;
292+
let mut j = 0;
293+
for i in 1..n {
294+
let mut k = if j + z[j] <= i {
295+
0
296+
} else {
297+
std::cmp::min(j + z[j] - i, z[i - j])
298+
};
299+
while i + k < n && s[k] == s[i + k] {
300+
k += 1;
301+
}
302+
z[i] = k;
303+
if j + z[j] < i + z[i] {
304+
j = i;
305+
}
306+
}
307+
z[0] = n;
308+
z
309+
}
310+
311+
pub fn z_algorithm(s: &str) -> Vec<usize> {
312+
let s: &[u8] = s.as_bytes();
313+
z_algorithm_arbitrary(s)
314+
}
315+
244316
#[cfg(test)]
245317
mod tests {
246318
use super::*;
@@ -286,4 +358,34 @@ mod tests {
286358
let str = "mmiissiissiippii"; // an example taken from https://mametter.hatenablog.com/entry/20180130/p1
287359
verify_all(str, &[15, 14, 10, 6, 2, 11, 7, 3, 1, 0, 13, 12, 9, 5, 8, 4]);
288360
}
361+
362+
#[test]
363+
fn test_lcp_0() {
364+
let str = "abracadabra";
365+
let sa = suffix_array(str.chars());
366+
let lcp = lcp_array(str, &sa);
367+
assert_eq!(lcp, &[1, 4, 1, 1, 0, 3, 0, 0, 0, 2]);
368+
}
369+
370+
#[test]
371+
fn test_lcp_1() {
372+
let str = "mmiissiissiippii"; // an example taken from https://mametter.hatenablog.com/entry/20180130/p1
373+
let sa = suffix_array(str.chars());
374+
let lcp = lcp_array(str, &sa);
375+
assert_eq!(lcp, &[1, 2, 2, 6, 1, 1, 5, 0, 1, 0, 1, 0, 3, 1, 4]);
376+
}
377+
378+
#[test]
379+
fn test_z_0() {
380+
let str = "abracadabra";
381+
let lcp = z_algorithm(str);
382+
assert_eq!(lcp, &[11, 0, 0, 1, 0, 1, 0, 4, 0, 0, 1]);
383+
}
384+
385+
#[test]
386+
fn test_z_1() {
387+
let str = "ababababa";
388+
let lcp = z_algorithm(str);
389+
assert_eq!(lcp, &[9, 0, 7, 0, 5, 0, 3, 0, 1]);
390+
}
289391
}

0 commit comments

Comments
 (0)