@@ -4,7 +4,7 @@ use std::collections::HashMap;
44
55const IMPOSSIBLE_NB : usize = 999_999_999_999 ;
66
7- // saves all the ctx needed to perform the algo in one place
7+ // saves all the precalculations needed
88struct Context {
99 alphabet : Vec < char > ,
1010 chains : Vec < Vec < char > > ,
@@ -26,7 +26,7 @@ impl Context {
2626
2727 let ms: Vec < Vec < Vec < u64 > > > = matrices_score ( & chains) ;
2828
29- // an impossible to reach point, father of all
29+ // an impossible to reach point, father of all points
3030 let p0 = vec ! [ IMPOSSIBLE_NB ; d] ;
3131
3232 let mut parents: HashMap < _ , Option < Vec < usize > > > = HashMap :: new ( ) ;
@@ -70,12 +70,15 @@ fn common_seq(ctx: &Context, p: &Vec<usize>) -> String {
7070 common_sequence. iter ( ) . rev ( ) . collect :: < String > ( )
7171}
7272
73- // given the list of strings, finds the minimal alphabet
74- // @detail finds the shortest string
75- // gets his alphabet
73+ /// Heuristic to find the smallest common alphabet among the strings
74+ /// gets the shortest string and remove duplicates
75+ ///
76+ /// # Arguments
77+ /// # 'chains' The strings among wich the mlcs is
78+ ///
79+ /// # Returns
80+ /// A vector
7681fn get_alphabet ( chains : & [ Vec < char > ] ) -> Vec < char > {
77- // OPTI comment
78- // use hashmap to keep track of inserted values
7982 let mut alphabet: Vec < char > = chains
8083 . iter ( )
8184 . min_by_key ( |s| s. len ( ) )
@@ -89,33 +92,39 @@ fn get_alphabet(chains: &[Vec<char>]) -> Vec<char> {
8992
9093/// CF Initqueue
9194fn get_starting_p ( ctx : & Context ) -> Vec < Vec < usize > > {
92- // OPTI : we may be passing the alphabet param directly as an iterator
9395 let mut successors: Vec < Vec < usize > > = vec ! [ ] ;
9496
95- // for all alphabet letters
97+ // for each alphabet letter, finds the next match
98+ // meaning the a point where all strings share a character
99+ // example: In ["AB", "BC", "CB", "BF"],
100+ // A match for the letter B would be p = (1, 0, 1, 0)
96101 for ( ch_idx, _) in ctx. alphabet . iter ( ) . enumerate ( ) {
97102 // for each string, finds the next position of that letter
98103 let mut succ: Vec < usize > = vec ! [ ] ;
99104 for i in 0 ..( ctx. chains . len ( ) ) {
105+ // gets the next position of the current letter
100106 let next_ch_idx = ctx. mt [ ch_idx] [ i] [ 0 ] ;
101107 succ. push ( next_ch_idx) ;
102108 }
103109
110+ // once the vector is complete, we add it to the successors
104111 successors. push ( succ) ;
105112 }
106113
107114 successors
108115}
109116
110117/// Finds all succcesors of the point p
118+ /// A successor of p = (p_1, p_2, etc, p_n) is a point q = (q_1, q_2, etc, q_n)
119+ /// such that q_1 > p_1, q_2 > p_2, etc, q_n > p_n
111120/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
112121///
113122/// # Arguments
114123/// # 'Context' A struct containing informations
115- /// # 'p' a vector
124+ /// # 'p' The point under examination
116125///
117126/// # Returns
118- /// An array of vectors
127+ /// An array of the successors
119128fn get_successors ( ctx : & Context , p : & [ usize ] ) -> Vec < Vec < usize > > {
120129 let mut successors: Vec < Vec < usize > > = vec ! [ ] ;
121130
@@ -125,16 +134,19 @@ fn get_successors(ctx: &Context, p: &[usize]) -> Vec<Vec<usize>> {
125134 let mut succ: Vec < usize > = vec ! [ ] ;
126135 for ( i, p_ith_elt) in p. iter ( ) . enumerate ( ) . take ( ctx. chains . len ( ) ) {
127136 let next_ch_idx = ctx. mt [ ch_idx] [ i] [ p_ith_elt + 1 ] ;
137+ // in case the letter is not rechable in the string
128138 if next_ch_idx == IMPOSSIBLE_NB {
129139 break ;
130140 }
131141
132142 succ. push ( next_ch_idx) ;
133143 }
134144
145+ // the vector is complete, hence we add it to the successors
135146 if succ. len ( ) == ctx. chains . len ( ) {
136147 successors. push ( succ) ;
137148 }
149+ // else we discard it and move on to the next letter
138150 }
139151 successors
140152}
@@ -155,15 +167,14 @@ fn heuristic(ctx: &Context, p: &[usize]) -> u64 {
155167 * similarity. iter ( ) . min ( ) . unwrap ( )
156168}
157169
158- /// Runs the successors a first time
170+ /// Add the first matches to the queue
159171/// For each starting point found, sets an impossible point as parent
160172/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
161173///
162174/// # Arguments
163175///
164176/// * `ctx' - A structure containing informations
165177/// * 'queue' - The priority queue of points
166- ///
167178fn init_queue ( ctx : & mut Context , queue : & mut Vec < Vec < usize > > ) {
168179 * queue = get_starting_p ( ctx) ;
169180
@@ -173,16 +184,14 @@ fn init_queue(ctx: &mut Context, queue: &mut Vec<Vec<usize>>) {
173184 reorder_queue ( ctx, queue) ;
174185}
175186
176- /// Computes the suffix tables used for the MLCS-Astar
177- /// (Multiple-Longest-Common-Substring) matching algorithm.
187+ /// Computes the suffix tables between each pair of string
188+ /// used by the MLCS-Astar heuristic function
178189/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
179190///
180191/// # Arguments
181192///
182- /// * `chains` - A slice of collected strings from which the suffix table is computed.
183- ///
184- /// # Returns
185- ///
193+ /// * `chains` - A slice of collected strings
194+ /// - from which the suffix tables are computed.
186195fn matrices_score ( chains : & [ Vec < char > ] ) -> Vec < Vec < Vec < u64 > > > {
187196 let mut scores: Vec < Vec < Vec < u64 > > > = vec ! [ ] ;
188197 for s1 in chains. iter ( ) {
@@ -194,8 +203,8 @@ fn matrices_score(chains: &[Vec<char>]) -> Vec<Vec<Vec<u64>>> {
194203 scores
195204}
196205
197- /// Builds the mt table used for accessing the index of the next char
198- /// updates the common alphabet at the same time
206+ /// Builds the lookup table used for accessing the index of the next char
207+ /// updates the alphabet to be the alphabet of the letters common to all strings
199208///
200209/// # Arguments
201210/// # 'chains' the strings as a matrix of char
@@ -215,24 +224,39 @@ fn mt_table(chains: &Vec<Vec<char>>, alphabet: &mut Vec<char>) -> Vec<Vec<Vec<us
215224 let mut v: Vec < usize > = vec ! [ IMPOSSIBLE_NB ; s. len( ) ] ;
216225 let mut lpos = IMPOSSIBLE_NB ;
217226
227+ // iterating backwards on the string
218228 for i in ( 0 ..( s. len ( ) ) ) . rev ( ) {
219229 if s[ i] == ch {
220230 lpos = i;
221231 }
222-
232+ // pushing the index of the last encounter with the current letter
223233 v[ i] = lpos;
224234 }
225235
226236 chain. push ( v) ;
227237
238+ // if the letter was never seen in the current string
239+ // then it can't part of the common alphabet
228240 if lpos == IMPOSSIBLE_NB {
241+ // removing that letter
229242 alphabet. retain ( |& x| x != ch) ;
230243 chain = vec ! [ ] ;
231244 break ;
232245 }
233246 }
234247
248+ // the letter was seen at leat once
235249 if !chain. is_empty ( ) {
250+ // pushing an array or array
251+ // example on ["AB", "ABAA"]
252+ // string1 => {
253+ // 'A' => {0, IMPOSSIBLE_NB}
254+ // 'B' => {1, 1}
255+ // }
256+ // string2 => {
257+ // 'A' => {0, 2, 2, 3}
258+ // 'B' => {1, 1, IMPOSSIBLE_NB, IMPOSSIBLE_NB}
259+ // }
236260 mt. push ( chain) ;
237261 }
238262 }
@@ -294,6 +318,7 @@ pub fn multiple_longest_common_subsequence(chains: &Vec<&str>) -> String {
294318 }
295319 }
296320 }
321+ // sorting the queue
297322 reorder_queue ( & ctx, & mut queue) ;
298323 }
299324 String :: from ( "" )
@@ -312,8 +337,7 @@ fn reorder_queue(ctx: &Context, queue: &mut [Vec<usize>]) {
312337 } ) ;
313338}
314339
315- // given two strings s1 and s2 we compute the score matrix
316- // @return matrix of size (m + 1) (n + 1)
340+ /// Computes the suffix table
317341fn score_matrix ( s1 : & [ char ] , s2 : & [ char ] ) -> Vec < Vec < u64 > > {
318342 let m = s1. len ( ) ;
319343 let n = s2. len ( ) ;
0 commit comments