1
- fn sa_naive ( s : & [ i32 ] ) -> Vec < usize > {
1
+ fn sa_naive < T : Ord > ( s : & [ T ] ) -> Vec < usize > {
2
2
let n = s. len ( ) ;
3
3
let mut sa: Vec < usize > = ( 0 ..n) . collect ( ) ;
4
4
sa. sort_by ( |& ( mut l) , & ( mut r) | {
@@ -52,16 +52,173 @@ fn sa_doubling(s: &[i32]) -> Vec<usize> {
52
52
sa
53
53
}
54
54
55
- fn sa_is ( s : & [ i32 ] , upper : i32 ) -> Vec < usize > {
56
- sa_doubling ( s)
55
+ trait Threshold {
56
+ fn threshold_naive ( ) -> usize ;
57
+ fn threshold_doubling ( ) -> usize ;
58
+ }
59
+
60
+ enum DefaultThreshold { }
61
+ impl Threshold for DefaultThreshold {
62
+ fn threshold_naive ( ) -> usize {
63
+ 10
64
+ }
65
+ fn threshold_doubling ( ) -> usize {
66
+ 40
67
+ }
68
+ }
69
+
70
+ fn sa_is < T : Threshold > ( s : & [ usize ] , upper : usize ) -> Vec < usize > {
71
+ let n = s. len ( ) ;
72
+ match n {
73
+ 0 => return vec ! [ ] ,
74
+ 1 => return vec ! [ 0 ] ,
75
+ 2 => return if s[ 0 ] < s[ 1 ] { vec ! [ 0 , 1 ] } else { vec ! [ 1 , 0 ] } ,
76
+ _ => ( ) ,
77
+ }
78
+ if n < T :: threshold_naive ( ) {
79
+ return sa_naive ( s) ;
80
+ }
81
+ if n < T :: threshold_doubling ( ) {
82
+ let s: Vec < i32 > = s. iter ( ) . map ( |& x| x as i32 ) . collect ( ) ;
83
+ return sa_doubling ( & s) ;
84
+ }
85
+ let mut sa = vec ! [ 0 ; n] ;
86
+ let mut ls = vec ! [ false ; n] ;
87
+ for i in ( 0 ..n - 1 ) . rev ( ) {
88
+ ls[ i] = if s[ i] == s[ i + 1 ] {
89
+ ls[ i + 1 ]
90
+ } else {
91
+ s[ i] < s[ i + 1 ]
92
+ } ;
93
+ }
94
+ let mut sum_l = vec ! [ 0 ; upper + 1 ] ;
95
+ let mut sum_s = vec ! [ 0 ; upper + 1 ] ;
96
+ for i in 0 ..n {
97
+ if !ls[ i] {
98
+ sum_s[ s[ i] ] += 1 ;
99
+ } else {
100
+ sum_l[ s[ i] + 1 ] += 1 ;
101
+ }
102
+ }
103
+ for i in 0 ..=upper {
104
+ sum_s[ i] += sum_l[ i] ;
105
+ if i < upper {
106
+ sum_l[ i + 1 ] += sum_s[ i] ;
107
+ }
108
+ }
109
+
110
+ // sa's origin is 1.
111
+ let induce = |sa : & mut [ usize ] , lms : & [ usize ] | {
112
+ for elem in sa. iter_mut ( ) {
113
+ * elem = 0 ;
114
+ }
115
+ let mut buf = sum_s. clone ( ) ;
116
+ for & d in lms {
117
+ if d == n {
118
+ continue ;
119
+ }
120
+ let old = buf[ s[ d] ] ;
121
+ buf[ s[ d] ] += 1 ;
122
+ sa[ old] = d + 1 ;
123
+ }
124
+ buf. copy_from_slice ( & sum_l) ;
125
+ let old = buf[ s[ n - 1 ] ] ;
126
+ buf[ s[ n - 1 ] ] += 1 ;
127
+ sa[ old] = n;
128
+ for i in 0 ..n {
129
+ let v = sa[ i] ;
130
+ if v >= 2 && !ls[ v - 2 ] {
131
+ let old = buf[ s[ v - 2 ] ] ;
132
+ buf[ s[ v - 2 ] ] += 1 ;
133
+ sa[ old] = v - 1 ;
134
+ }
135
+ }
136
+ buf. copy_from_slice ( & sum_l) ;
137
+ for i in ( 0 ..n) . rev ( ) {
138
+ let v = sa[ i] ;
139
+ if v >= 2 && ls[ v - 2 ] {
140
+ buf[ s[ v - 2 ] + 1 ] -= 1 ;
141
+ sa[ buf[ s[ v - 2 ] + 1 ] ] = v - 1 ;
142
+ }
143
+ }
144
+ } ;
145
+ // origin: 1
146
+ let mut lms_map = vec ! [ 0 ; n + 1 ] ;
147
+ let mut m = 0 ;
148
+ for i in 1 ..n {
149
+ if !ls[ i - 1 ] && ls[ i] {
150
+ lms_map[ i] = m + 1 ;
151
+ m += 1 ;
152
+ }
153
+ }
154
+ let mut lms = Vec :: with_capacity ( m) ;
155
+ for i in 1 ..n {
156
+ if !ls[ i - 1 ] && ls[ i] {
157
+ lms. push ( i) ;
158
+ }
159
+ }
160
+ assert_eq ! ( lms. len( ) , m) ;
161
+ induce ( & mut sa, & lms) ;
162
+
163
+ if m > 0 {
164
+ let mut sorted_lms = Vec :: with_capacity ( m) ;
165
+ for & v in & sa {
166
+ if lms_map[ v - 1 ] != 0 {
167
+ sorted_lms. push ( v - 1 ) ;
168
+ }
169
+ }
170
+ let mut rec_s = vec ! [ 0 ; m] ;
171
+ let mut rec_upper = 0 ;
172
+ rec_s[ lms_map[ sorted_lms[ 0 ] ] - 1 ] = 0 ;
173
+ for i in 1 ..m {
174
+ let mut l = sorted_lms[ i - 1 ] ;
175
+ let mut r = sorted_lms[ i] ;
176
+ let end_l = if lms_map[ l] < m { lms[ lms_map[ l] ] } else { n } ;
177
+ let end_r = if lms_map[ r] < m { lms[ lms_map[ r] ] } else { n } ;
178
+ let mut same = true ;
179
+ if end_l - l != end_r - r {
180
+ same = false ;
181
+ } else {
182
+ while l < end_l {
183
+ if s[ l] != s[ r] {
184
+ break ;
185
+ }
186
+ l += 1 ;
187
+ r += 1 ;
188
+ }
189
+ if l == n || s[ l] != s[ r] {
190
+ same = false ;
191
+ }
192
+ }
193
+ if !same {
194
+ rec_upper += 1 ;
195
+ }
196
+ rec_s[ lms_map[ sorted_lms[ i] ] - 1 ] = rec_upper;
197
+ }
198
+
199
+ let rec_sa = sa_is :: < T > ( & rec_s, rec_upper) ;
200
+ for i in 0 ..m {
201
+ sorted_lms[ i] = lms[ rec_sa[ i] ] ;
202
+ }
203
+ induce ( & mut sa, & mut sorted_lms) ;
204
+ }
205
+ for i in 0 ..n {
206
+ sa[ i] -= 1 ;
207
+ }
208
+ sa
209
+ }
210
+
211
+ fn sa_is_i32 < T : Threshold > ( s : & [ i32 ] , upper : i32 ) -> Vec < usize > {
212
+ let s: Vec < usize > = s. iter ( ) . map ( |& x| x as usize ) . collect ( ) ;
213
+ sa_is :: < T > ( & s, upper as usize )
57
214
}
58
215
59
216
pub fn suffix_array_manual ( s : & [ i32 ] , upper : i32 ) -> Vec < usize > {
60
217
assert ! ( upper >= 0 ) ;
61
218
for & elem in s {
62
219
assert ! ( 0 <= elem && elem <= upper) ;
63
220
}
64
- sa_is ( s, upper)
221
+ sa_is_i32 :: < DefaultThreshold > ( s, upper)
65
222
}
66
223
67
224
pub fn suffix_array_arbitrary < T : Ord > ( s : & [ T ] ) -> Vec < usize > {
@@ -76,18 +233,41 @@ pub fn suffix_array_arbitrary<T: Ord>(s: &[T]) -> Vec<usize> {
76
233
}
77
234
s2[ idx[ i] ] = now;
78
235
}
79
- sa_is ( & s2, now)
236
+ sa_is_i32 :: < DefaultThreshold > ( & s2, now)
80
237
}
81
238
82
239
pub fn suffix_array ( s : impl IntoIterator < Item = char > ) -> Vec < usize > {
83
- let mut s2: Vec < i32 > = s. into_iter ( ) . map ( |x| x as i32 ) . collect ( ) ;
84
- sa_is ( & s2, 255 )
240
+ let s2: Vec < usize > = s. into_iter ( ) . map ( |x| x as usize ) . collect ( ) ;
241
+ sa_is :: < DefaultThreshold > ( & s2, 255 )
85
242
}
86
243
87
244
#[ cfg( test) ]
88
245
mod tests {
89
246
use super :: * ;
90
247
248
+ enum ZeroThreshold { }
249
+ impl Threshold for ZeroThreshold {
250
+ fn threshold_naive ( ) -> usize {
251
+ 0
252
+ }
253
+ fn threshold_doubling ( ) -> usize {
254
+ 0
255
+ }
256
+ }
257
+
258
+ fn verify_all ( str : & str , expected_array : & [ usize ] ) {
259
+ let array: Vec < i32 > = str. bytes ( ) . map ( |x| x as i32 ) . collect ( ) ;
260
+ let sa = sa_doubling ( & array) ;
261
+ assert_eq ! ( sa, expected_array) ;
262
+ let sa_naive = sa_naive ( & array) ;
263
+ assert_eq ! ( sa_naive, expected_array) ;
264
+ let sa_is = sa_is_i32 :: < ZeroThreshold > ( & array, 255 ) ;
265
+ assert_eq ! ( sa_is, expected_array) ;
266
+
267
+ let sa_str = suffix_array ( str. chars ( ) ) ;
268
+ assert_eq ! ( sa_str, expected_array) ;
269
+ }
270
+
91
271
#[ test]
92
272
fn test_sa_0 ( ) {
93
273
let array = vec ! [ 0 , 1 , 2 , 3 , 4 ] ;
@@ -98,15 +278,12 @@ mod tests {
98
278
#[ test]
99
279
fn test_sa_1 ( ) {
100
280
let str = "abracadabra" ;
101
- let array: Vec < i32 > = str. bytes ( ) . map ( |x| x as i32 ) . collect ( ) ;
102
- let sa = sa_doubling ( & array) ;
103
- assert_eq ! ( sa, vec![ 10 , 7 , 0 , 3 , 5 , 8 , 1 , 4 , 6 , 9 , 2 ] ) ;
104
- let sa_naive = sa_naive ( & array) ;
105
- assert_eq ! ( sa_naive, sa) ;
106
- let sa_is = sa_is ( & array, 10 ) ;
107
- assert_eq ! ( sa_is, sa) ;
281
+ verify_all ( str, & [ 10 , 7 , 0 , 3 , 5 , 8 , 1 , 4 , 6 , 9 , 2 ] ) ;
282
+ }
108
283
109
- let sa_str = suffix_array ( str. chars ( ) ) ;
110
- assert_eq ! ( sa_str, sa) ;
284
+ #[ test]
285
+ fn test_sa_2 ( ) {
286
+ let str = "mmiissiissiippii" ; // an example taken from https://mametter.hatenablog.com/entry/20180130/p1
287
+ verify_all ( str, & [ 15 , 14 , 10 , 6 , 2 , 11 , 7 , 3 , 1 , 0 , 13 , 12 , 9 , 5 , 8 , 4 ] ) ;
111
288
}
112
289
}
0 commit comments