Skip to content

Commit 563a875

Browse files
committed
doc(mlcs): improved a lot
1 parent d108ff3 commit 563a875

File tree

1 file changed

+48
-24
lines changed

1 file changed

+48
-24
lines changed

src/string/multiple_longest_common_subsequence.rs

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::collections::HashMap;
44

55
const IMPOSSIBLE_NB: usize = 999_999_999_999;
66

7-
// saves all the ctx needed to perform the algo in one place
7+
// saves all the precalculations needed
88
struct Context {
99
alphabet: Vec<char>,
1010
chains: Vec<Vec<char>>,
@@ -26,7 +26,7 @@ impl Context {
2626

2727
let ms: Vec<Vec<Vec<u64>>> = matrices_score(&chains);
2828

29-
// an impossible to reach point, father of all
29+
// an impossible to reach point, father of all points
3030
let p0 = vec![IMPOSSIBLE_NB; d];
3131

3232
let mut parents: HashMap<_, Option<Vec<usize>>> = HashMap::new();
@@ -70,12 +70,15 @@ fn common_seq(ctx: &Context, p: &Vec<usize>) -> String {
7070
common_sequence.iter().rev().collect::<String>()
7171
}
7272

73-
// given the list of strings, finds the minimal alphabet
74-
// @detail finds the shortest string
75-
// gets his alphabet
73+
/// Heuristic to find the smallest common alphabet among the strings
74+
/// gets the shortest string and remove duplicates
75+
///
76+
/// # Arguments
77+
/// # 'chains' The strings among wich the mlcs is
78+
///
79+
/// # Returns
80+
/// A vector
7681
fn get_alphabet(chains: &[Vec<char>]) -> Vec<char> {
77-
// OPTI comment
78-
// use hashmap to keep track of inserted values
7982
let mut alphabet: Vec<char> = chains
8083
.iter()
8184
.min_by_key(|s| s.len())
@@ -89,33 +92,39 @@ fn get_alphabet(chains: &[Vec<char>]) -> Vec<char> {
8992

9093
/// CF Initqueue
9194
fn get_starting_p(ctx: &Context) -> Vec<Vec<usize>> {
92-
// OPTI : we may be passing the alphabet param directly as an iterator
9395
let mut successors: Vec<Vec<usize>> = vec![];
9496

95-
// for all alphabet letters
97+
// for each alphabet letter, finds the next match
98+
// meaning the a point where all strings share a character
99+
// example: In ["AB", "BC", "CB", "BF"],
100+
// A match for the letter B would be p = (1, 0, 1, 0)
96101
for (ch_idx, _) in ctx.alphabet.iter().enumerate() {
97102
// for each string, finds the next position of that letter
98103
let mut succ: Vec<usize> = vec![];
99104
for i in 0..(ctx.chains.len()) {
105+
// gets the next position of the current letter
100106
let next_ch_idx = ctx.mt[ch_idx][i][0];
101107
succ.push(next_ch_idx);
102108
}
103109

110+
// once the vector is complete, we add it to the successors
104111
successors.push(succ);
105112
}
106113

107114
successors
108115
}
109116

110117
/// Finds all succcesors of the point p
118+
/// A successor of p = (p_1, p_2, etc, p_n) is a point q = (q_1, q_2, etc, q_n)
119+
/// such that q_1 > p_1, q_2 > p_2, etc, q_n > p_n
111120
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
112121
///
113122
/// # Arguments
114123
/// # 'Context' A struct containing informations
115-
/// # 'p' a vector
124+
/// # 'p' The point under examination
116125
///
117126
/// # Returns
118-
/// An array of vectors
127+
/// An array of the successors
119128
fn get_successors(ctx: &Context, p: &[usize]) -> Vec<Vec<usize>> {
120129
let mut successors: Vec<Vec<usize>> = vec![];
121130

@@ -125,16 +134,19 @@ fn get_successors(ctx: &Context, p: &[usize]) -> Vec<Vec<usize>> {
125134
let mut succ: Vec<usize> = vec![];
126135
for (i, p_ith_elt) in p.iter().enumerate().take(ctx.chains.len()) {
127136
let next_ch_idx = ctx.mt[ch_idx][i][p_ith_elt + 1];
137+
// in case the letter is not rechable in the string
128138
if next_ch_idx == IMPOSSIBLE_NB {
129139
break;
130140
}
131141

132142
succ.push(next_ch_idx);
133143
}
134144

145+
// the vector is complete, hence we add it to the successors
135146
if succ.len() == ctx.chains.len() {
136147
successors.push(succ);
137148
}
149+
// else we discard it and move on to the next letter
138150
}
139151
successors
140152
}
@@ -155,15 +167,14 @@ fn heuristic(ctx: &Context, p: &[usize]) -> u64 {
155167
*similarity.iter().min().unwrap()
156168
}
157169

158-
/// Runs the successors a first time
170+
/// Add the first matches to the queue
159171
/// For each starting point found, sets an impossible point as parent
160172
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
161173
///
162174
/// # Arguments
163175
///
164176
/// * `ctx' - A structure containing informations
165177
/// * 'queue' - The priority queue of points
166-
///
167178
fn init_queue(ctx: &mut Context, queue: &mut Vec<Vec<usize>>) {
168179
*queue = get_starting_p(ctx);
169180

@@ -173,16 +184,14 @@ fn init_queue(ctx: &mut Context, queue: &mut Vec<Vec<usize>>) {
173184
reorder_queue(ctx, queue);
174185
}
175186

176-
/// Computes the suffix tables used for the MLCS-Astar
177-
/// (Multiple-Longest-Common-Substring) matching algorithm.
187+
/// Computes the suffix tables between each pair of string
188+
/// used by the MLCS-Astar heuristic function
178189
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
179190
///
180191
/// # Arguments
181192
///
182-
/// * `chains` - A slice of collected strings from which the suffix table is computed.
183-
///
184-
/// # Returns
185-
///
193+
/// * `chains` - A slice of collected strings
194+
/// - from which the suffix tables are computed.
186195
fn matrices_score(chains: &[Vec<char>]) -> Vec<Vec<Vec<u64>>> {
187196
let mut scores: Vec<Vec<Vec<u64>>> = vec![];
188197
for s1 in chains.iter() {
@@ -194,8 +203,8 @@ fn matrices_score(chains: &[Vec<char>]) -> Vec<Vec<Vec<u64>>> {
194203
scores
195204
}
196205

197-
/// Builds the mt table used for accessing the index of the next char
198-
/// updates the common alphabet at the same time
206+
/// Builds the lookup table used for accessing the index of the next char
207+
/// updates the alphabet to be the alphabet of the letters common to all strings
199208
///
200209
/// # Arguments
201210
/// # 'chains' the strings as a matrix of char
@@ -215,24 +224,39 @@ fn mt_table(chains: &Vec<Vec<char>>, alphabet: &mut Vec<char>) -> Vec<Vec<Vec<us
215224
let mut v: Vec<usize> = vec![IMPOSSIBLE_NB; s.len()];
216225
let mut lpos = IMPOSSIBLE_NB;
217226

227+
// iterating backwards on the string
218228
for i in (0..(s.len())).rev() {
219229
if s[i] == ch {
220230
lpos = i;
221231
}
222-
232+
// pushing the index of the last encounter with the current letter
223233
v[i] = lpos;
224234
}
225235

226236
chain.push(v);
227237

238+
// if the letter was never seen in the current string
239+
// then it can't part of the common alphabet
228240
if lpos == IMPOSSIBLE_NB {
241+
// removing that letter
229242
alphabet.retain(|&x| x != ch);
230243
chain = vec![];
231244
break;
232245
}
233246
}
234247

248+
// the letter was seen at leat once
235249
if !chain.is_empty() {
250+
// pushing an array or array
251+
// example on ["AB", "ABAA"]
252+
// string1 => {
253+
// 'A' => {0, IMPOSSIBLE_NB}
254+
// 'B' => {1, 1}
255+
// }
256+
// string2 => {
257+
// 'A' => {0, 2, 2, 3}
258+
// 'B' => {1, 1, IMPOSSIBLE_NB, IMPOSSIBLE_NB}
259+
// }
236260
mt.push(chain);
237261
}
238262
}
@@ -294,6 +318,7 @@ pub fn multiple_longest_common_subsequence(chains: &Vec<&str>) -> String {
294318
}
295319
}
296320
}
321+
// sorting the queue
297322
reorder_queue(&ctx, &mut queue);
298323
}
299324
String::from("")
@@ -312,8 +337,7 @@ fn reorder_queue(ctx: &Context, queue: &mut [Vec<usize>]) {
312337
});
313338
}
314339

315-
// given two strings s1 and s2 we compute the score matrix
316-
// @return matrix of size (m + 1) (n + 1)
340+
/// Computes the suffix table
317341
fn score_matrix(s1: &[char], s2: &[char]) -> Vec<Vec<u64>> {
318342
let m = s1.len();
319343
let n = s2.len();

0 commit comments

Comments
 (0)