Skip to content

Commit e028c5d

Browse files
committed
fix: PR changes
1 parent a5a0f3b commit e028c5d

File tree

1 file changed

+145
-141
lines changed

1 file changed

+145
-141
lines changed

src/string/multiple_longest_common_subsequence.rs

Lines changed: 145 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@ use std::cmp::max;
22
use std::cmp::Ordering;
33
use std::collections::HashMap;
44

5-
const IMPOSSIBLE_NB: usize = 999_999_999_999;
5+
const IMPOSSIBLE_NB: usize = usize::MAX;
66

7-
// saves the precalculations
8-
// will be moved around a lot
97
// alphabet : the common alphabet
108
// chains : the strings among which the common subsequence is
119
// d : the number of strings
@@ -60,142 +58,172 @@ impl Context {
6058
parents,
6159
}
6260
}
63-
}
6461

65-
// ascend back up the parent tree to form the common subsequence
66-
fn common_seq(ctx: &Context, p: &Vec<usize>) -> String {
67-
let ref_str: &Vec<char> = &ctx.chains[0];
68-
let mut common_subsequence: Vec<char> = vec![];
69-
// Gaining mutability
70-
let mut p = p;
62+
// given a point p and his successor q, computes necessary informations
63+
// point p is marked PARENT of q
64+
pub fn update_suc(&mut self, p: Vec<usize>, q: Vec<usize>) {
65+
// g(q) = g(p) + 1
66+
let nb = &self.g[&p] + 1;
67+
self.g.insert(q.clone(), nb);
68+
// saves the cost function for point p : h(p) + g(p)
69+
self.f.insert(q.clone(), self.heuristic(&q) + nb);
70+
// saves the fact that p is the parent of q
71+
self.parents.insert(q, Some(p));
72+
}
73+
74+
/// Finds all succcesors of the point p
75+
/// A successor of p = (p_1, p_2, etc, p_n) is a point q = (q_1, q_2, etc, q_n)
76+
/// such that q_1 > p_1, q_2 > p_2, etc, q_n > p_n
77+
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
78+
///
79+
/// # Arguments
80+
/// # 'Context' A struct containing informations
81+
/// # 'p' The point under examination
82+
///
83+
/// # Returns
84+
/// An array of the successors
85+
pub fn get_successors(&self, p: &[usize]) -> Vec<Vec<usize>> {
86+
let mut successors: Vec<Vec<usize>> = vec![];
87+
88+
// for all alphabet letters
89+
for (ch_idx, _) in self.alphabet.iter().enumerate() {
90+
// for each string, finds the next position of that letter
91+
let mut succ: Vec<usize> = vec![];
92+
for (i, p_ith_elt) in p.iter().enumerate().take(self.chains.len()) {
93+
let next_ch_idx = self.mt[ch_idx][i][p_ith_elt + 1];
94+
// in case the letter is not rechable in the string
95+
if next_ch_idx == IMPOSSIBLE_NB {
96+
break;
97+
}
7198

72-
while ctx.parents[p].is_some() {
73-
common_subsequence.push(ref_str[p[0]]);
99+
succ.push(next_ch_idx);
100+
}
74101

75-
// getting the parent of current point
76-
p = ctx.parents[p].as_ref().unwrap();
102+
// the vector is complete, hence we add it to the successors
103+
if succ.len() == self.chains.len() {
104+
successors.push(succ);
105+
}
106+
// else we discard it and move on to the next letter
107+
}
108+
successors
77109
}
78110

79-
common_subsequence.iter().rev().collect::<String>()
80-
}
111+
// ascend back up the parent tree to form the common subsequence
112+
fn common_seq(&self, p: &Vec<usize>) -> String {
113+
let ref_str: &Vec<char> = &self.chains[0];
114+
let mut common_subsequence: Vec<char> = vec![];
115+
// Gaining mutability
116+
let mut p = p;
81117

82-
/// Heuristic to find the smallest common alphabet among the strings
83-
/// gets the shortest string and remove duplicates
84-
///
85-
/// # Arguments
86-
/// # 'chains' The strings among wich the mlcs is
87-
///
88-
/// # Returns
89-
/// A vector
90-
fn get_alphabet(chains: &[Vec<char>]) -> Vec<char> {
91-
let mut alphabet: Vec<char> = chains
92-
.iter()
93-
.min_by_key(|s| s.len())
94-
.expect("No minimum found")
95-
.to_vec();
96-
alphabet.sort();
97-
alphabet.dedup();
98-
99-
alphabet
100-
}
118+
while self.parents[p].is_some() {
119+
common_subsequence.push(ref_str[p[0]]);
101120

102-
/// CF Initqueue
103-
fn get_starting_p(ctx: &Context) -> Vec<Vec<usize>> {
104-
let mut successors: Vec<Vec<usize>> = vec![];
105-
106-
// for each alphabet letter, finds the next match
107-
// meaning the a point where all strings share a character
108-
// example: In ["AB", "BC", "CB", "BF"],
109-
// A match for the letter B would be p = (1, 0, 1, 0)
110-
for (ch_idx, _) in ctx.alphabet.iter().enumerate() {
111-
// for each string, finds the next position of that letter
112-
let mut succ: Vec<usize> = vec![];
113-
for i in 0..(ctx.chains.len()) {
114-
// gets the next position of the current letter
115-
let next_ch_idx = ctx.mt[ch_idx][i][0];
116-
succ.push(next_ch_idx);
121+
// getting the parent of current point
122+
p = self.parents[p].as_ref().unwrap();
117123
}
118124

119-
// once the vector is complete, we add it to the successors
120-
successors.push(succ);
125+
common_subsequence.iter().rev().collect::<String>()
121126
}
122127

123-
successors
124-
}
125-
126-
/// Finds all succcesors of the point p
127-
/// A successor of p = (p_1, p_2, etc, p_n) is a point q = (q_1, q_2, etc, q_n)
128-
/// such that q_1 > p_1, q_2 > p_2, etc, q_n > p_n
129-
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
130-
///
131-
/// # Arguments
132-
/// # 'Context' A struct containing informations
133-
/// # 'p' The point under examination
134-
///
135-
/// # Returns
136-
/// An array of the successors
137-
fn get_successors(ctx: &Context, p: &[usize]) -> Vec<Vec<usize>> {
138-
let mut successors: Vec<Vec<usize>> = vec![];
139-
140-
// for all alphabet letters
141-
for (ch_idx, _) in ctx.alphabet.iter().enumerate() {
142-
// for each string, finds the next position of that letter
143-
let mut succ: Vec<usize> = vec![];
144-
for (i, p_ith_elt) in p.iter().enumerate().take(ctx.chains.len()) {
145-
let next_ch_idx = ctx.mt[ch_idx][i][p_ith_elt + 1];
146-
// in case the letter is not rechable in the string
147-
if next_ch_idx == IMPOSSIBLE_NB {
148-
break;
128+
/// CF Initqueue
129+
fn get_starting_p(&self) -> Vec<Vec<usize>> {
130+
let mut successors: Vec<Vec<usize>> = vec![];
131+
132+
// for each alphabet letter, finds the next match
133+
// meaning the a point where all strings share a character
134+
// example: In ["AB", "BC", "CB", "BF"],
135+
// A match for the letter B would be p = (1, 0, 1, 0)
136+
for (ch_idx, _) in self.alphabet.iter().enumerate() {
137+
// for each string, finds the next position of that letter
138+
let mut succ: Vec<usize> = vec![];
139+
for i in 0..(self.chains.len()) {
140+
// gets the next position of the current letter
141+
let next_ch_idx = self.mt[ch_idx][i][0];
142+
succ.push(next_ch_idx);
149143
}
150144

151-
succ.push(next_ch_idx);
152-
}
153-
154-
// the vector is complete, hence we add it to the successors
155-
if succ.len() == ctx.chains.len() {
145+
// once the vector is complete, we add it to the successors
156146
successors.push(succ);
157147
}
158-
// else we discard it and move on to the next letter
148+
149+
successors
159150
}
160-
successors
161-
}
162151

163-
/// Computes the heuristic function given a point
164-
/// min ( { M_ij[ p[i] ][ p[j] ] | (i,j) in [0 ; d] } )
165-
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
166-
fn heuristic(ctx: &Context, p: &[usize]) -> u64 {
167-
let mut similarity: Vec<u64> = vec![];
168-
for i in 0..ctx.d {
169-
for j in 0..ctx.d {
170-
if i != j {
171-
similarity.push(ctx.ms[translate(i, j, ctx.d)][p[i]][p[j]]);
152+
/// Computes the heuristic function given a point
153+
/// min ( { M_ij[ p[i] ][ p[j] ] | (i,j) in [0 ; d] } )
154+
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
155+
fn heuristic(&self, p: &[usize]) -> u64 {
156+
let mut similarity: Vec<u64> = vec![];
157+
for i in 0..self.d {
158+
for j in 0..self.d {
159+
if i != j {
160+
similarity.push(self.ms[to_linear_index(i, j, self.d)][p[i]][p[j]]);
161+
}
172162
}
173163
}
164+
165+
*similarity.iter().min().unwrap()
166+
}
167+
168+
/// Add the first matches to the queue
169+
/// For each starting point found, sets an impossible point as parent
170+
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
171+
///
172+
/// # Arguments
173+
///
174+
/// * `self' - A structure containing informations
175+
/// * 'queue' - The priority queue of points
176+
fn init_queue(&mut self) -> Vec<Vec<usize>> {
177+
let mut queue = self.get_starting_p();
178+
179+
for q in queue.clone() {
180+
self.update_suc(vec![IMPOSSIBLE_NB; self.d], q.clone());
181+
}
182+
183+
self.reorder_queue(&mut queue);
184+
185+
queue
186+
}
187+
188+
// sorts the queue
189+
fn reorder_queue(&self, queue: &mut [Vec<usize>]) {
190+
queue.sort_unstable_by(|p, q| {
191+
if (self.f.get(p) > self.f.get(q))
192+
|| (self.f.get(p) == self.f.get(q) && self.heuristic(p) > self.heuristic(q))
193+
{
194+
Ordering::Greater
195+
} else {
196+
Ordering::Less
197+
}
198+
});
174199
}
175200

176-
*similarity.iter().min().unwrap()
177201
}
178202

179-
/// Add the first matches to the queue
180-
/// For each starting point found, sets an impossible point as parent
181-
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
203+
204+
/// Heuristic to find the smallest common alphabet among the strings
205+
/// gets the shortest string and remove duplicates
182206
///
183207
/// # Arguments
208+
/// # 'chains' The strings among wich the mlcs is
184209
///
185-
/// * `ctx' - A structure containing informations
186-
/// * 'queue' - The priority queue of points
187-
fn init_queue(ctx: &mut Context, queue: &mut Vec<Vec<usize>>) {
188-
*queue = get_starting_p(ctx);
210+
/// # Returns
211+
/// A vector
212+
fn get_alphabet(chains: &[Vec<char>]) -> Vec<char> {
213+
let mut alphabet: Vec<char> = chains
214+
.iter()
215+
.min_by_key(|s| s.len())
216+
.expect("No minimum found")
217+
.to_vec();
218+
alphabet.sort();
219+
alphabet.dedup();
189220

190-
for q in queue.clone() {
191-
update_suc(ctx, vec![IMPOSSIBLE_NB; ctx.d], q.clone());
192-
}
193-
reorder_queue(ctx, queue);
221+
alphabet
194222
}
195223

196224
/// Computes the suffix tables between each pair of string
197225
/// used by the MLCS-Astar heuristic function
198-
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
226+
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
199227
///
200228
/// # Arguments
201229
///
@@ -275,7 +303,7 @@ fn mt_table(chains: &Vec<Vec<char>>, alphabet: &mut Vec<char>) -> Vec<Vec<Vec<us
275303

276304
/// Finds one of the longest_common_subsequence among multiple strings
277305
/// using a similar approach to the A* algorithm in graph theory
278-
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/paper.pdf)
306+
/// [Documentation](https://github.com/epita-rs/MLCS/blob/main/doc/paper.pdf)
279307
/// # Arguments
280308
///
281309
/// * `S` - Array of strings.
@@ -291,8 +319,7 @@ pub fn multiple_longest_common_subsequence(chains: &Vec<&str>) -> String {
291319
let mut ctx = Context::new(chains);
292320

293321
// queue
294-
let mut queue: Vec<Vec<usize>> = vec![];
295-
init_queue(&mut ctx, &mut queue);
322+
let mut queue: Vec<Vec<usize>> = ctx.init_queue();
296323

297324
while !queue.is_empty() {
298325
// y = max( {f(p) | p in queue} )
@@ -312,39 +339,27 @@ pub fn multiple_longest_common_subsequence(chains: &Vec<&str>) -> String {
312339
queue.clear();
313340

314341
for p in second_queue {
315-
if heuristic(&ctx, &p) == 0 {
342+
if ctx.heuristic(&p) == 0 {
316343
// An MLCS match was found
317-
return common_seq(&ctx, &p);
344+
return ctx.common_seq(&p);
318345
}
319346
// inserting all succesors in the queue
320-
let succs = get_successors(&ctx, &p);
347+
let succs = ctx.get_successors(&p);
321348
for q in succs {
322349
// basically saying if the queue queue does not already
323350
// contain the point q
324351
if !queue.contains(&q) {
325-
update_suc(&mut ctx, p.clone(), q.clone());
352+
ctx.update_suc(p.clone(), q.clone());
326353
queue.push(q);
327354
}
328355
}
329356
}
330357
// sorting the queue
331-
reorder_queue(&ctx, &mut queue);
358+
ctx.reorder_queue(&mut queue);
332359
}
333360
String::from("")
334361
}
335362

336-
// sorts the queue
337-
fn reorder_queue(ctx: &Context, queue: &mut [Vec<usize>]) {
338-
queue.sort_unstable_by(|p, q| {
339-
if (ctx.f.get(p) > ctx.f.get(q))
340-
|| (ctx.f.get(p) == ctx.f.get(q) && heuristic(ctx, p) > heuristic(ctx, q))
341-
{
342-
Ordering::Greater
343-
} else {
344-
Ordering::Less
345-
}
346-
});
347-
}
348363

349364
/// Computes the suffix table
350365
fn score_matrix(s1: &[char], s2: &[char]) -> Vec<Vec<u64>> {
@@ -367,22 +382,11 @@ fn score_matrix(s1: &[char], s2: &[char]) -> Vec<Vec<u64>> {
367382
matrix
368383
}
369384

370-
//given given 2D coordinates, translates into 1D coordinates
371-
fn translate(i: usize, j: usize, d: usize) -> usize {
385+
//given given 2D coordinates, to_linear_indexs into 1D coordinates
386+
fn to_linear_index(i: usize, j: usize, d: usize) -> usize {
372387
i * d + j
373388
}
374389

375-
// given a point p and his successor q, computes necessary informations
376-
// point p is marked PARENT of q
377-
fn update_suc(ctx: &mut Context, p: Vec<usize>, q: Vec<usize>) {
378-
// g(q) = g(p) + 1
379-
let nb = &ctx.g[&p] + 1;
380-
ctx.g.insert(q.clone(), nb);
381-
// saves the cost function for point p : h(p) + g(p)
382-
ctx.f.insert(q.clone(), heuristic(ctx, &q) + nb);
383-
// saves the fact that p is the parent of q
384-
ctx.parents.insert(q, Some(p));
385-
}
386390

387391
#[cfg(test)]
388392
mod tests {

0 commit comments

Comments
 (0)