diff --git a/goombay-rs/Cargo.lock b/goombay-rs/Cargo.lock index a9794e3..f745f04 100644 --- a/goombay-rs/Cargo.lock +++ b/goombay-rs/Cargo.lock @@ -2,13 +2,70 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "goombay-rs" version = "0.2.1" dependencies = [ + "ndarray", "spindalis", ] +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "proc-macro2" version = "1.0.103" @@ -27,6 +84,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "spindalis" version = "0.4.6" diff --git a/goombay-rs/Cargo.toml b/goombay-rs/Cargo.toml index 1055a33..ce07728 100644 --- a/goombay-rs/Cargo.toml +++ b/goombay-rs/Cargo.toml @@ -5,3 +5,4 @@ edition = "2024" [dependencies] spindalis = "0.4.6" +ndarray = "0.15" \ No newline at end of file diff --git a/goombay-rs/src/alignment/edit/gotoh.rs b/goombay-rs/src/alignment/edit/gotoh.rs new file mode 100644 index 0000000..02cc759 --- /dev/null +++ b/goombay-rs/src/alignment/edit/gotoh.rs @@ -0,0 +1,160 @@ +use crate::align::global_base::{GlobalAlgorithm, GlobalAlignmentModel, Metric}; +use crate::align::scoring::ExtendedGapScoring; +use crate::align::{AlignmentData, GlobalAlignmentMatrix, PointerValues, Scoring}; + +pub struct Gotoh { + pub scores: S, +} + +impl Default for Gotoh { + fn default() -> Self { + let scores = ExtendedGapScoring { + identity: 2, + mismatch: 1, + gap: 3, + extended_gap: 1, + }; + Self { scores } + } +} + +impl GlobalAlignmentMatrix for Gotoh { + fn compute(query: &str, subject: &str) -> GlobalAlignmentModel { + // Use default scores to calculate scoring and pointer matrices + let gotoh_default = Gotoh::default(); + gotoh_default.calculate_matrix(query, subject) + } + + fn set_scores(scores: &ExtendedGapScoring) -> Self { + // Set custom scores before manually calculating matrices + Self { + scores: scores.clone(), + } + } + + fn calculate_matrix(&self, query: &str, subject: &str) -> GlobalAlignmentModel { + let mut alignments = AlignmentData::new_gotoh(query, subject); + let query_len = alignments.query.len() + 1; + let subject_len = alignments.subject.len() + 1; + + // D, P, Q matrices correspond to score_matrix[0], score_matrix[1], score_matrix[2] + let (score_first, score_rest) = alignments.score_matrix.split_at_mut(1); + let d = &mut score_first[0]; + let (score_second, score_third) = score_rest.split_at_mut(1); + let p = &mut score_second[0]; + let q = &mut score_third[0]; + + let (ptr_first, ptr_rest) = alignments.pointer_matrix.split_at_mut(1); + let d_ptr = &mut ptr_first[0]; + let (ptr_second, ptr_third) = ptr_rest.split_at_mut(1); + let p_ptr = &mut ptr_second[0]; + let q_ptr = &mut ptr_third[0]; + + d[0][0] = 0; + for i in 1..query_len { + d[i][0] = -(self.scores.gap as i32 + (i as i32 * self.scores.extended_gap as i32)); + d_ptr[i][0] = PointerValues::Up as i32; + } + for j in 1..subject_len { + d[0][j] = -(self.scores.gap as i32 + (j as i32 * self.scores.extended_gap as i32)); + d_ptr[0][j] = PointerValues::Left as i32; + } + + // Initialize P matrix (gap extension in query) + for i in 0..query_len { + p[i][0] = 0; + } + + // Initialize Q matrix (gap extension in subject) + for j in 0..subject_len { + q[0][j] = 0; + } + + // Fill the matrices + for i in 1..query_len { + for j in 1..subject_len { + // Calculate substitution score + let substitution = if alignments.query[i - 1] == alignments.subject[j - 1] { + self.scores.identity as i32 + } else { + -(self.scores.mismatch as i32) + }; + + // Calculate D matrix (match/mismatch) + let d_match = d[i - 1][j - 1] + substitution; + let d_from_p = p[i - 1][j - 1] + substitution; + let d_from_q = q[i - 1][j - 1] + substitution; + d[i][j] = [d_match, d_from_p, d_from_q] + .iter() + .max() + .copied() + .unwrap_or(i32::MIN); + + // Calculate P matrix (gap extension in query) + let p_gap_open = + d[i - 1][j] - (self.scores.gap as i32 + self.scores.extended_gap as i32); + let p_gap_extend = p[i - 1][j] - (self.scores.extended_gap as i32); + p[i][j] = [p_gap_open, p_gap_extend] + .iter() + .max() + .copied() + .unwrap_or(i32::MIN); + + // Calculate Q matrix (gap extension in subject) + let q_gap_open = + d[i][j - 1] - (self.scores.gap as i32 + self.scores.extended_gap as i32); + let q_gap_extend = q[i][j - 1] - (self.scores.extended_gap as i32); + q[i][j] = [q_gap_open, q_gap_extend] + .iter() + .max() + .copied() + .unwrap_or(i32::MIN); + + // Set pointers for P matrix + if p[i][j] == p_gap_open { + p_ptr[i][j] = PointerValues::Up as i32; + p_ptr[i - 1][j] = PointerValues::Match as i32; + } else if p[i][j] == p_gap_extend { + p_ptr[i][j] = PointerValues::Up as i32; + p_ptr[i - 1][j] = PointerValues::Up as i32; + } + + // Set pointers for Q matrix + if q[i][j] == q_gap_open { + q_ptr[i][j] = PointerValues::Left as i32; + q_ptr[i][j - 1] = PointerValues::Match as i32; + } else if q[i][j] == q_gap_extend { + q_ptr[i][j] = PointerValues::Left as i32; + q_ptr[i][j - 1] = PointerValues::Left as i32; + } + + // Set pointers for D matrix based on which matrix gave the maximum score + let max_score = [d[i][j], p[i][j], q[i][j]] + .iter() + .max() + .copied() + .unwrap_or(i32::MIN); + + if max_score == d[i][j] { + d_ptr[i][j] += PointerValues::Match as i32; + } + if max_score == p[i][j] { + d_ptr[i][j] += PointerValues::Up as i32; + } + if max_score == q[i][j] { + d_ptr[i][j] += PointerValues::Left as i32; + } + } + } + + GlobalAlignmentModel { + data: alignments, + aligner: GlobalAlgorithm::Gotoh, + metric: Metric::Similarity, + identity: self.scores.identity, + mismatch: self.scores.mismatch, + gap: self.scores.gap, + all_alignments: false, + } + } +} diff --git a/goombay-rs/src/alignment/edit/mod.rs b/goombay-rs/src/alignment/edit/mod.rs index 5d34521..1de8908 100644 --- a/goombay-rs/src/alignment/edit/mod.rs +++ b/goombay-rs/src/alignment/edit/mod.rs @@ -1,3 +1,4 @@ +pub mod gotoh; pub mod needleman_wunsch; pub mod smith_waterman; pub mod wagner_fischer; diff --git a/goombay-rs/src/alignment/global_base.rs b/goombay-rs/src/alignment/global_base.rs index 0c586d5..b4e1f01 100644 --- a/goombay-rs/src/alignment/global_base.rs +++ b/goombay-rs/src/alignment/global_base.rs @@ -5,6 +5,7 @@ use spindalis::utils::Arr2D; pub enum GlobalAlgorithm { NeedlemanWunsch, WagnerFischer, + Gotoh, } // Handles matrices that store similarity score vs distance score @@ -40,7 +41,9 @@ impl GlobalAlignmentModel { fn select_aligner(&self) -> Box + '_> { match self.aligner { - GlobalAlgorithm::NeedlemanWunsch | GlobalAlgorithm::WagnerFischer => { + GlobalAlgorithm::NeedlemanWunsch + | GlobalAlgorithm::WagnerFischer + | GlobalAlgorithm::Gotoh => { let i = self.data.query.len(); let j = self.data.subject.len(); let global_aligner = GlobalAligner {