Skip to content

Commit 86ed9cc

Browse files
committed
Add sa_is
1 parent 26e4392 commit 86ed9cc

File tree

1 file changed

+193
-16
lines changed

1 file changed

+193
-16
lines changed

src/string.rs

Lines changed: 193 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
fn sa_naive(s: &[i32]) -> Vec<usize> {
1+
fn sa_naive<T: Ord>(s: &[T]) -> Vec<usize> {
22
let n = s.len();
33
let mut sa: Vec<usize> = (0..n).collect();
44
sa.sort_by(|&(mut l), &(mut r)| {
@@ -52,16 +52,173 @@ fn sa_doubling(s: &[i32]) -> Vec<usize> {
5252
sa
5353
}
5454

55-
fn sa_is(s: &[i32], upper: i32) -> Vec<usize> {
56-
sa_doubling(s)
55+
trait Threshold {
56+
fn threshold_naive() -> usize;
57+
fn threshold_doubling() -> usize;
58+
}
59+
60+
enum DefaultThreshold {}
61+
impl Threshold for DefaultThreshold {
62+
fn threshold_naive() -> usize {
63+
10
64+
}
65+
fn threshold_doubling() -> usize {
66+
40
67+
}
68+
}
69+
70+
fn sa_is<T: Threshold>(s: &[usize], upper: usize) -> Vec<usize> {
71+
let n = s.len();
72+
match n {
73+
0 => return vec![],
74+
1 => return vec![0],
75+
2 => return if s[0] < s[1] { vec![0, 1] } else { vec![1, 0] },
76+
_ => (),
77+
}
78+
if n < T::threshold_naive() {
79+
return sa_naive(s);
80+
}
81+
if n < T::threshold_doubling() {
82+
let s: Vec<i32> = s.iter().map(|&x| x as i32).collect();
83+
return sa_doubling(&s);
84+
}
85+
let mut sa = vec![0; n];
86+
let mut ls = vec![false; n];
87+
for i in (0..n - 1).rev() {
88+
ls[i] = if s[i] == s[i + 1] {
89+
ls[i + 1]
90+
} else {
91+
s[i] < s[i + 1]
92+
};
93+
}
94+
let mut sum_l = vec![0; upper + 1];
95+
let mut sum_s = vec![0; upper + 1];
96+
for i in 0..n {
97+
if !ls[i] {
98+
sum_s[s[i]] += 1;
99+
} else {
100+
sum_l[s[i] + 1] += 1;
101+
}
102+
}
103+
for i in 0..=upper {
104+
sum_s[i] += sum_l[i];
105+
if i < upper {
106+
sum_l[i + 1] += sum_s[i];
107+
}
108+
}
109+
110+
// sa's origin is 1.
111+
let induce = |sa: &mut [usize], lms: &[usize]| {
112+
for elem in sa.iter_mut() {
113+
*elem = 0;
114+
}
115+
let mut buf = sum_s.clone();
116+
for &d in lms {
117+
if d == n {
118+
continue;
119+
}
120+
let old = buf[s[d]];
121+
buf[s[d]] += 1;
122+
sa[old] = d + 1;
123+
}
124+
buf.copy_from_slice(&sum_l);
125+
let old = buf[s[n - 1]];
126+
buf[s[n - 1]] += 1;
127+
sa[old] = n;
128+
for i in 0..n {
129+
let v = sa[i];
130+
if v >= 2 && !ls[v - 2] {
131+
let old = buf[s[v - 2]];
132+
buf[s[v - 2]] += 1;
133+
sa[old] = v - 1;
134+
}
135+
}
136+
buf.copy_from_slice(&sum_l);
137+
for i in (0..n).rev() {
138+
let v = sa[i];
139+
if v >= 2 && ls[v - 2] {
140+
buf[s[v - 2] + 1] -= 1;
141+
sa[buf[s[v - 2] + 1]] = v - 1;
142+
}
143+
}
144+
};
145+
// origin: 1
146+
let mut lms_map = vec![0; n + 1];
147+
let mut m = 0;
148+
for i in 1..n {
149+
if !ls[i - 1] && ls[i] {
150+
lms_map[i] = m + 1;
151+
m += 1;
152+
}
153+
}
154+
let mut lms = Vec::with_capacity(m);
155+
for i in 1..n {
156+
if !ls[i - 1] && ls[i] {
157+
lms.push(i);
158+
}
159+
}
160+
assert_eq!(lms.len(), m);
161+
induce(&mut sa, &lms);
162+
163+
if m > 0 {
164+
let mut sorted_lms = Vec::with_capacity(m);
165+
for &v in &sa {
166+
if lms_map[v - 1] != 0 {
167+
sorted_lms.push(v - 1);
168+
}
169+
}
170+
let mut rec_s = vec![0; m];
171+
let mut rec_upper = 0;
172+
rec_s[lms_map[sorted_lms[0]] - 1] = 0;
173+
for i in 1..m {
174+
let mut l = sorted_lms[i - 1];
175+
let mut r = sorted_lms[i];
176+
let end_l = if lms_map[l] < m { lms[lms_map[l]] } else { n };
177+
let end_r = if lms_map[r] < m { lms[lms_map[r]] } else { n };
178+
let mut same = true;
179+
if end_l - l != end_r - r {
180+
same = false;
181+
} else {
182+
while l < end_l {
183+
if s[l] != s[r] {
184+
break;
185+
}
186+
l += 1;
187+
r += 1;
188+
}
189+
if l == n || s[l] != s[r] {
190+
same = false;
191+
}
192+
}
193+
if !same {
194+
rec_upper += 1;
195+
}
196+
rec_s[lms_map[sorted_lms[i]] - 1] = rec_upper;
197+
}
198+
199+
let rec_sa = sa_is::<T>(&rec_s, rec_upper);
200+
for i in 0..m {
201+
sorted_lms[i] = lms[rec_sa[i]];
202+
}
203+
induce(&mut sa, &mut sorted_lms);
204+
}
205+
for i in 0..n {
206+
sa[i] -= 1;
207+
}
208+
sa
209+
}
210+
211+
fn sa_is_i32<T: Threshold>(s: &[i32], upper: i32) -> Vec<usize> {
212+
let s: Vec<usize> = s.iter().map(|&x| x as usize).collect();
213+
sa_is::<T>(&s, upper as usize)
57214
}
58215

59216
pub fn suffix_array_manual(s: &[i32], upper: i32) -> Vec<usize> {
60217
assert!(upper >= 0);
61218
for &elem in s {
62219
assert!(0 <= elem && elem <= upper);
63220
}
64-
sa_is(s, upper)
221+
sa_is_i32::<DefaultThreshold>(s, upper)
65222
}
66223

67224
pub fn suffix_array_arbitrary<T: Ord>(s: &[T]) -> Vec<usize> {
@@ -76,18 +233,41 @@ pub fn suffix_array_arbitrary<T: Ord>(s: &[T]) -> Vec<usize> {
76233
}
77234
s2[idx[i]] = now;
78235
}
79-
sa_is(&s2, now)
236+
sa_is_i32::<DefaultThreshold>(&s2, now)
80237
}
81238

82239
pub fn suffix_array(s: impl IntoIterator<Item = char>) -> Vec<usize> {
83-
let mut s2: Vec<i32> = s.into_iter().map(|x| x as i32).collect();
84-
sa_is(&s2, 255)
240+
let s2: Vec<usize> = s.into_iter().map(|x| x as usize).collect();
241+
sa_is::<DefaultThreshold>(&s2, 255)
85242
}
86243

87244
#[cfg(test)]
88245
mod tests {
89246
use super::*;
90247

248+
enum ZeroThreshold {}
249+
impl Threshold for ZeroThreshold {
250+
fn threshold_naive() -> usize {
251+
0
252+
}
253+
fn threshold_doubling() -> usize {
254+
0
255+
}
256+
}
257+
258+
fn verify_all(str: &str, expected_array: &[usize]) {
259+
let array: Vec<i32> = str.bytes().map(|x| x as i32).collect();
260+
let sa = sa_doubling(&array);
261+
assert_eq!(sa, expected_array);
262+
let sa_naive = sa_naive(&array);
263+
assert_eq!(sa_naive, expected_array);
264+
let sa_is = sa_is_i32::<ZeroThreshold>(&array, 255);
265+
assert_eq!(sa_is, expected_array);
266+
267+
let sa_str = suffix_array(str.chars());
268+
assert_eq!(sa_str, expected_array);
269+
}
270+
91271
#[test]
92272
fn test_sa_0() {
93273
let array = vec![0, 1, 2, 3, 4];
@@ -98,15 +278,12 @@ mod tests {
98278
#[test]
99279
fn test_sa_1() {
100280
let str = "abracadabra";
101-
let array: Vec<i32> = str.bytes().map(|x| x as i32).collect();
102-
let sa = sa_doubling(&array);
103-
assert_eq!(sa, vec![10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2]);
104-
let sa_naive = sa_naive(&array);
105-
assert_eq!(sa_naive, sa);
106-
let sa_is = sa_is(&array, 10);
107-
assert_eq!(sa_is, sa);
281+
verify_all(str, &[10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2]);
282+
}
108283

109-
let sa_str = suffix_array(str.chars());
110-
assert_eq!(sa_str, sa);
284+
#[test]
285+
fn test_sa_2() {
286+
let str = "mmiissiissiippii"; // an example taken from https://mametter.hatenablog.com/entry/20180130/p1
287+
verify_all(str, &[15, 14, 10, 6, 2, 11, 7, 3, 1, 0, 13, 12, 9, 5, 8, 4]);
111288
}
112289
}

0 commit comments

Comments
 (0)