@@ -2,10 +2,8 @@ use std::cmp::max;
22use std:: cmp:: Ordering ;
33use std:: collections:: HashMap ;
44
5- const IMPOSSIBLE_NB : usize = 999_999_999_999 ;
5+ const IMPOSSIBLE_NB : usize = usize :: MAX ;
66
7- // saves the precalculations
8- // will be moved around a lot
97// alphabet : the common alphabet
108// chains : the strings among which the common subsequence is
119// d : the number of strings
@@ -60,142 +58,172 @@ impl Context {
6058 parents,
6159 }
6260 }
63- }
6461
65- // ascend back up the parent tree to form the common subsequence
66- fn common_seq ( ctx : & Context , p : & Vec < usize > ) -> String {
67- let ref_str: & Vec < char > = & ctx. chains [ 0 ] ;
68- let mut common_subsequence: Vec < char > = vec ! [ ] ;
69- // Gaining mutability
70- let mut p = p;
62+ // given a point p and his successor q, computes necessary informations
63+ // point p is marked PARENT of q
64+ pub fn update_suc ( & mut self , p : Vec < usize > , q : Vec < usize > ) {
65+ // g(q) = g(p) + 1
66+ let nb = & self . g [ & p] + 1 ;
67+ self . g . insert ( q. clone ( ) , nb) ;
68+ // saves the cost function for point p : h(p) + g(p)
69+ self . f . insert ( q. clone ( ) , self . heuristic ( & q) + nb) ;
70+ // saves the fact that p is the parent of q
71+ self . parents . insert ( q, Some ( p) ) ;
72+ }
73+
74+ /// Finds all succcesors of the point p
75+ /// A successor of p = (p_1, p_2, etc, p_n) is a point q = (q_1, q_2, etc, q_n)
76+ /// such that q_1 > p_1, q_2 > p_2, etc, q_n > p_n
77+ /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
78+ ///
79+ /// # Arguments
80+ /// # 'Context' A struct containing informations
81+ /// # 'p' The point under examination
82+ ///
83+ /// # Returns
84+ /// An array of the successors
85+ pub fn get_successors ( & self , p : & [ usize ] ) -> Vec < Vec < usize > > {
86+ let mut successors: Vec < Vec < usize > > = vec ! [ ] ;
87+
88+ // for all alphabet letters
89+ for ( ch_idx, _) in self . alphabet . iter ( ) . enumerate ( ) {
90+ // for each string, finds the next position of that letter
91+ let mut succ: Vec < usize > = vec ! [ ] ;
92+ for ( i, p_ith_elt) in p. iter ( ) . enumerate ( ) . take ( self . chains . len ( ) ) {
93+ let next_ch_idx = self . mt [ ch_idx] [ i] [ p_ith_elt + 1 ] ;
94+ // in case the letter is not rechable in the string
95+ if next_ch_idx == IMPOSSIBLE_NB {
96+ break ;
97+ }
7198
72- while ctx . parents [ p ] . is_some ( ) {
73- common_subsequence . push ( ref_str [ p [ 0 ] ] ) ;
99+ succ . push ( next_ch_idx ) ;
100+ }
74101
75- // getting the parent of current point
76- p = ctx. parents [ p] . as_ref ( ) . unwrap ( ) ;
102+ // the vector is complete, hence we add it to the successors
103+ if succ. len ( ) == self . chains . len ( ) {
104+ successors. push ( succ) ;
105+ }
106+ // else we discard it and move on to the next letter
107+ }
108+ successors
77109 }
78110
79- common_subsequence. iter ( ) . rev ( ) . collect :: < String > ( )
80- }
111+ // ascend back up the parent tree to form the common subsequence
112+ fn common_seq ( & self , p : & Vec < usize > ) -> String {
113+ let ref_str: & Vec < char > = & self . chains [ 0 ] ;
114+ let mut common_subsequence: Vec < char > = vec ! [ ] ;
115+ // Gaining mutability
116+ let mut p = p;
81117
82- /// Heuristic to find the smallest common alphabet among the strings
83- /// gets the shortest string and remove duplicates
84- ///
85- /// # Arguments
86- /// # 'chains' The strings among wich the mlcs is
87- ///
88- /// # Returns
89- /// A vector
90- fn get_alphabet ( chains : & [ Vec < char > ] ) -> Vec < char > {
91- let mut alphabet: Vec < char > = chains
92- . iter ( )
93- . min_by_key ( |s| s. len ( ) )
94- . expect ( "No minimum found" )
95- . to_vec ( ) ;
96- alphabet. sort ( ) ;
97- alphabet. dedup ( ) ;
98-
99- alphabet
100- }
118+ while self . parents [ p] . is_some ( ) {
119+ common_subsequence. push ( ref_str[ p[ 0 ] ] ) ;
101120
102- /// CF Initqueue
103- fn get_starting_p ( ctx : & Context ) -> Vec < Vec < usize > > {
104- let mut successors: Vec < Vec < usize > > = vec ! [ ] ;
105-
106- // for each alphabet letter, finds the next match
107- // meaning the a point where all strings share a character
108- // example: In ["AB", "BC", "CB", "BF"],
109- // A match for the letter B would be p = (1, 0, 1, 0)
110- for ( ch_idx, _) in ctx. alphabet . iter ( ) . enumerate ( ) {
111- // for each string, finds the next position of that letter
112- let mut succ: Vec < usize > = vec ! [ ] ;
113- for i in 0 ..( ctx. chains . len ( ) ) {
114- // gets the next position of the current letter
115- let next_ch_idx = ctx. mt [ ch_idx] [ i] [ 0 ] ;
116- succ. push ( next_ch_idx) ;
121+ // getting the parent of current point
122+ p = self . parents [ p] . as_ref ( ) . unwrap ( ) ;
117123 }
118124
119- // once the vector is complete, we add it to the successors
120- successors. push ( succ) ;
125+ common_subsequence. iter ( ) . rev ( ) . collect :: < String > ( )
121126 }
122127
123- successors
124- }
125-
126- /// Finds all succcesors of the point p
127- /// A successor of p = (p_1, p_2, etc, p_n) is a point q = (q_1, q_2, etc, q_n)
128- /// such that q_1 > p_1, q_2 > p_2, etc, q_n > p_n
129- /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
130- ///
131- /// # Arguments
132- /// # 'Context' A struct containing informations
133- /// # 'p' The point under examination
134- ///
135- /// # Returns
136- /// An array of the successors
137- fn get_successors ( ctx : & Context , p : & [ usize ] ) -> Vec < Vec < usize > > {
138- let mut successors: Vec < Vec < usize > > = vec ! [ ] ;
139-
140- // for all alphabet letters
141- for ( ch_idx, _) in ctx. alphabet . iter ( ) . enumerate ( ) {
142- // for each string, finds the next position of that letter
143- let mut succ: Vec < usize > = vec ! [ ] ;
144- for ( i, p_ith_elt) in p. iter ( ) . enumerate ( ) . take ( ctx. chains . len ( ) ) {
145- let next_ch_idx = ctx. mt [ ch_idx] [ i] [ p_ith_elt + 1 ] ;
146- // in case the letter is not rechable in the string
147- if next_ch_idx == IMPOSSIBLE_NB {
148- break ;
128+ /// CF Initqueue
129+ fn get_starting_p ( & self ) -> Vec < Vec < usize > > {
130+ let mut successors: Vec < Vec < usize > > = vec ! [ ] ;
131+
132+ // for each alphabet letter, finds the next match
133+ // meaning the a point where all strings share a character
134+ // example: In ["AB", "BC", "CB", "BF"],
135+ // A match for the letter B would be p = (1, 0, 1, 0)
136+ for ( ch_idx, _) in self . alphabet . iter ( ) . enumerate ( ) {
137+ // for each string, finds the next position of that letter
138+ let mut succ: Vec < usize > = vec ! [ ] ;
139+ for i in 0 ..( self . chains . len ( ) ) {
140+ // gets the next position of the current letter
141+ let next_ch_idx = self . mt [ ch_idx] [ i] [ 0 ] ;
142+ succ. push ( next_ch_idx) ;
149143 }
150144
151- succ. push ( next_ch_idx) ;
152- }
153-
154- // the vector is complete, hence we add it to the successors
155- if succ. len ( ) == ctx. chains . len ( ) {
145+ // once the vector is complete, we add it to the successors
156146 successors. push ( succ) ;
157147 }
158- // else we discard it and move on to the next letter
148+
149+ successors
159150 }
160- successors
161- }
162151
163- /// Computes the heuristic function given a point
164- /// min ( { M_ij[ p[i] ][ p[j] ] | (i,j) in [0 ; d] } )
165- /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
166- fn heuristic ( ctx : & Context , p : & [ usize ] ) -> u64 {
167- let mut similarity: Vec < u64 > = vec ! [ ] ;
168- for i in 0 ..ctx. d {
169- for j in 0 ..ctx. d {
170- if i != j {
171- similarity. push ( ctx. ms [ translate ( i, j, ctx. d ) ] [ p[ i] ] [ p[ j] ] ) ;
152+ /// Computes the heuristic function given a point
153+ /// min ( { M_ij[ p[i] ][ p[j] ] | (i,j) in [0 ; d] } )
154+ /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
155+ fn heuristic ( & self , p : & [ usize ] ) -> u64 {
156+ let mut similarity: Vec < u64 > = vec ! [ ] ;
157+ for i in 0 ..self . d {
158+ for j in 0 ..self . d {
159+ if i != j {
160+ similarity. push ( self . ms [ to_linear_index ( i, j, self . d ) ] [ p[ i] ] [ p[ j] ] ) ;
161+ }
172162 }
173163 }
164+
165+ * similarity. iter ( ) . min ( ) . unwrap ( )
166+ }
167+
168+ /// Add the first matches to the queue
169+ /// For each starting point found, sets an impossible point as parent
170+ /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
171+ ///
172+ /// # Arguments
173+ ///
174+ /// * `self' - A structure containing informations
175+ /// * 'queue' - The priority queue of points
176+ fn init_queue ( & mut self ) -> Vec < Vec < usize > > {
177+ let mut queue = self . get_starting_p ( ) ;
178+
179+ for q in queue. clone ( ) {
180+ self . update_suc ( vec ! [ IMPOSSIBLE_NB ; self . d] , q. clone ( ) ) ;
181+ }
182+
183+ self . reorder_queue ( & mut queue) ;
184+
185+ queue
186+ }
187+
188+ // sorts the queue
189+ fn reorder_queue ( & self , queue : & mut [ Vec < usize > ] ) {
190+ queue. sort_unstable_by ( |p, q| {
191+ if ( self . f . get ( p) > self . f . get ( q) )
192+ || ( self . f . get ( p) == self . f . get ( q) && self . heuristic ( p) > self . heuristic ( q) )
193+ {
194+ Ordering :: Greater
195+ } else {
196+ Ordering :: Less
197+ }
198+ } ) ;
174199 }
175200
176- * similarity. iter ( ) . min ( ) . unwrap ( )
177201}
178202
179- /// Add the first matches to the queue
180- /// For each starting point found, sets an impossible point as parent
181- /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
203+
204+ /// Heuristic to find the smallest common alphabet among the strings
205+ /// gets the shortest string and remove duplicates
182206///
183207/// # Arguments
208+ /// # 'chains' The strings among wich the mlcs is
184209///
185- /// * `ctx' - A structure containing informations
186- /// * 'queue' - The priority queue of points
187- fn init_queue ( ctx : & mut Context , queue : & mut Vec < Vec < usize > > ) {
188- * queue = get_starting_p ( ctx) ;
210+ /// # Returns
211+ /// A vector
212+ fn get_alphabet ( chains : & [ Vec < char > ] ) -> Vec < char > {
213+ let mut alphabet: Vec < char > = chains
214+ . iter ( )
215+ . min_by_key ( |s| s. len ( ) )
216+ . expect ( "No minimum found" )
217+ . to_vec ( ) ;
218+ alphabet. sort ( ) ;
219+ alphabet. dedup ( ) ;
189220
190- for q in queue. clone ( ) {
191- update_suc ( ctx, vec ! [ IMPOSSIBLE_NB ; ctx. d] , q. clone ( ) ) ;
192- }
193- reorder_queue ( ctx, queue) ;
221+ alphabet
194222}
195223
196224/// Computes the suffix tables between each pair of string
197225/// used by the MLCS-Astar heuristic function
198- /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
226+ /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/ paper.pdf)
199227///
200228/// # Arguments
201229///
@@ -275,7 +303,7 @@ fn mt_table(chains: &Vec<Vec<char>>, alphabet: &mut Vec<char>) -> Vec<Vec<Vec<us
275303
276304/// Finds one of the longest_common_subsequence among multiple strings
277305/// using a similar approach to the A* algorithm in graph theory
278- /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
306+ /// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/ paper.pdf)
279307/// # Arguments
280308///
281309/// * `S` - Array of strings.
@@ -291,8 +319,7 @@ pub fn multiple_longest_common_subsequence(chains: &Vec<&str>) -> String {
291319 let mut ctx = Context :: new ( chains) ;
292320
293321 // queue
294- let mut queue: Vec < Vec < usize > > = vec ! [ ] ;
295- init_queue ( & mut ctx, & mut queue) ;
322+ let mut queue: Vec < Vec < usize > > = ctx. init_queue ( ) ;
296323
297324 while !queue. is_empty ( ) {
298325 // y = max( {f(p) | p in queue} )
@@ -312,39 +339,27 @@ pub fn multiple_longest_common_subsequence(chains: &Vec<&str>) -> String {
312339 queue. clear ( ) ;
313340
314341 for p in second_queue {
315- if heuristic ( & ctx , & p) == 0 {
342+ if ctx . heuristic ( & p) == 0 {
316343 // An MLCS match was found
317- return common_seq ( & ctx , & p) ;
344+ return ctx . common_seq ( & p) ;
318345 }
319346 // inserting all succesors in the queue
320- let succs = get_successors ( & ctx , & p) ;
347+ let succs = ctx . get_successors ( & p) ;
321348 for q in succs {
322349 // basically saying if the queue queue does not already
323350 // contain the point q
324351 if !queue. contains ( & q) {
325- update_suc ( & mut ctx , p. clone ( ) , q. clone ( ) ) ;
352+ ctx . update_suc ( p. clone ( ) , q. clone ( ) ) ;
326353 queue. push ( q) ;
327354 }
328355 }
329356 }
330357 // sorting the queue
331- reorder_queue ( & ctx , & mut queue) ;
358+ ctx . reorder_queue ( & mut queue) ;
332359 }
333360 String :: from ( "" )
334361}
335362
336- // sorts the queue
337- fn reorder_queue ( ctx : & Context , queue : & mut [ Vec < usize > ] ) {
338- queue. sort_unstable_by ( |p, q| {
339- if ( ctx. f . get ( p) > ctx. f . get ( q) )
340- || ( ctx. f . get ( p) == ctx. f . get ( q) && heuristic ( ctx, p) > heuristic ( ctx, q) )
341- {
342- Ordering :: Greater
343- } else {
344- Ordering :: Less
345- }
346- } ) ;
347- }
348363
349364/// Computes the suffix table
350365fn score_matrix ( s1 : & [ char ] , s2 : & [ char ] ) -> Vec < Vec < u64 > > {
@@ -367,22 +382,11 @@ fn score_matrix(s1: &[char], s2: &[char]) -> Vec<Vec<u64>> {
367382 matrix
368383}
369384
370- //given given 2D coordinates, translates into 1D coordinates
371- fn translate ( i : usize , j : usize , d : usize ) -> usize {
385+ //given given 2D coordinates, to_linear_indexs into 1D coordinates
386+ fn to_linear_index ( i : usize , j : usize , d : usize ) -> usize {
372387 i * d + j
373388}
374389
375- // given a point p and his successor q, computes necessary informations
376- // point p is marked PARENT of q
377- fn update_suc ( ctx : & mut Context , p : Vec < usize > , q : Vec < usize > ) {
378- // g(q) = g(p) + 1
379- let nb = & ctx. g [ & p] + 1 ;
380- ctx. g . insert ( q. clone ( ) , nb) ;
381- // saves the cost function for point p : h(p) + g(p)
382- ctx. f . insert ( q. clone ( ) , heuristic ( ctx, & q) + nb) ;
383- // saves the fact that p is the parent of q
384- ctx. parents . insert ( q, Some ( p) ) ;
385- }
386390
387391#[ cfg( test) ]
388392mod tests {
0 commit comments