Skip to content

Commit 3716d46

Browse files
committed
Include parallelized rust preprocessing functions
1 parent f764a69 commit 3716d46

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

utils/genes-preprocessing/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ crate-type = ["cdylib"]
99

1010
[dependencies]
1111
pyo3 = "0.27.0"
12+
rayon = "1.11.0"

utils/genes-preprocessing/src/lib.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use pyo3::prelude::*;
44
#[pymodule]
55
mod gene_preprocessing {
66
use pyo3::prelude::*;
7+
use rayon::prelude::*;
78
/// Formats the sum of two numbers as string.
89
#[pyfunction]
910
fn sum_as_string(a: usize, b: usize) -> PyResult<String> {
@@ -52,10 +53,65 @@ mod gene_preprocessing {
5253
Ok(count)
5354
}
5455

56+
#[pyfunction]
57+
fn count_overlap_batch(
58+
all_exons_a: Vec<Vec<Vec<i64>>>, // [pair_idx][exon_idx][start, end]
59+
all_exons_b: Vec<Vec<Vec<i64>>>,
60+
threshold: f32,
61+
) -> PyResult<Vec<i32>> {
62+
let results = all_exons_a
63+
.par_iter()
64+
.zip(all_exons_b.par_iter())
65+
.map(|(exons_a, exons_b)| {
66+
// Your existing logic, just inlined
67+
exons_a
68+
.iter()
69+
.zip(exons_b.iter())
70+
.filter(|(ea, eb)| {
71+
let start_a = ea[0];
72+
let end_a = ea[1];
73+
let start_b = eb[0];
74+
let end_b = eb[1];
75+
76+
let length_a = end_a - start_a;
77+
let length_b = end_b - start_b;
78+
79+
let overlap_start = start_a.max(start_b);
80+
let overlap_end = end_a.min(end_b);
81+
let overlap_length = (overlap_end - overlap_start).max(0);
82+
83+
if overlap_length == 0 {
84+
return false;
85+
}
86+
87+
let overlap_ratio_a = overlap_length as f32 / length_a as f32;
88+
let overlap_ratio_b = overlap_length as f32 / length_b as f32;
89+
overlap_ratio_a.min(overlap_ratio_b) >= threshold
90+
})
91+
.count() as i32
92+
})
93+
.collect();
94+
95+
Ok(results)
96+
}
97+
5598
#[pyfunction]
5699
fn distance_to_agreement(exon_a_5p: Vec<i64>, exon_b_5p: Vec<i64>) -> PyResult<Vec<i64>> {
57100
Ok(vec![(exon_a_5p[0] - exon_b_5p[0]).abs(), (exon_a_5p[1] - exon_b_5p[1]).abs()])
58101
// Ok(exon_a_5p.iter().zip(exon_b_5p.iter()).map(|(xa, xb)|->
59102
// Vec<i64>{vec![(xa[0]-xb[0]).abs(), (xa[1]-xb[1]).abs()]}).collect())
60103
}
104+
105+
#[pyfunction]
106+
fn distance_to_agreement_batch(
107+
exon_a_5p: Vec<Vec<i64>>,
108+
exon_b_5p: Vec<Vec<i64>>,
109+
) -> PyResult<(Vec<i64>, Vec<i64>)> {
110+
let (diffs_0, diffs_1): (Vec<i64>, Vec<i64>) = exon_a_5p
111+
.par_iter()
112+
.zip(exon_b_5p.par_iter())
113+
.map(|(xa, xb)| ((xa[0] - xb[0]).abs(), (xa[1] - xb[1]).abs()))
114+
.unzip();
115+
Ok((diffs_0, diffs_1))
116+
}
61117
}

0 commit comments

Comments
 (0)