Skip to content

Commit ec8e9de

Browse files
fxwiegandtedil
andauthored
Add new parameter --max-read-depth to the vcf-report (#76)
* Add max-read-depth to CLI * Change rows to HashSet * fmt * Update src/bcf/report/table_report/static_reader.rs Co-authored-by: Till Hartmann <till.hartmann@udo.edu> * Update src/bcf/report/table_report/static_reader.rs Co-authored-by: Till Hartmann <till.hartmann@udo.edu> * Addition to suggestion from @tedil Co-authored-by: Till Hartmann <till.hartmann@udo.edu>
1 parent 3c02fed commit ec8e9de

File tree

6 files changed

+47
-6
lines changed

6 files changed

+47
-6
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ rustc-serialize = "0.3"
2121
quick-error = "1.2"
2222
newtype_derive = "0.1"
2323
custom_derive = "0.1"
24-
rand = "0.4"
24+
rand = "0.7.3"
25+
rand_core = "0.5.1"
2526
cogset = "0.2"
2627
num-bigint = "0.2"
2728
serde = "1.0"

src/bcf/report/table_report/create_report_table.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ pub struct Report {
3737
vis: HashMap<String, String>,
3838
}
3939

40+
#[allow(clippy::too_many_arguments)]
4041
pub(crate) fn make_table_report(
4142
vcf_path: &Path,
4243
fasta_path: &Path,
@@ -45,6 +46,7 @@ pub(crate) fn make_table_report(
4546
formats: Option<Vec<&str>>,
4647
sample: String,
4748
output_path: &str,
49+
max_read_depth: u32,
4850
) -> Result<(), Box<dyn Error>> {
4951
// HashMap<gene: String, Vec<Report>>, Vec<ann_field_identifiers: String>
5052
let mut reports = HashMap::new();
@@ -292,6 +294,7 @@ pub(crate) fn make_table_report(
292294
chrom.clone(),
293295
0,
294296
end_position as u64 + 75,
297+
max_read_depth,
295298
);
296299
visualization =
297300
manipulate_json(content, 0, end_position as u64 + 75, max_rows);
@@ -303,6 +306,7 @@ pub(crate) fn make_table_report(
303306
chrom.clone(),
304307
pos as u64 - 75,
305308
fasta_length - 1,
309+
max_read_depth,
306310
);
307311
visualization =
308312
manipulate_json(content, pos as u64 - 75, fasta_length - 1, max_rows);
@@ -314,6 +318,7 @@ pub(crate) fn make_table_report(
314318
chrom.clone(),
315319
pos as u64 - 75,
316320
end_position as u64 + 75,
321+
max_read_depth,
317322
);
318323
visualization = manipulate_json(
319324
content,
@@ -408,6 +413,7 @@ fn create_report_data(
408413
chrom: String,
409414
from: u64,
410415
to: u64,
416+
max_read_depth: u32,
411417
) -> (Json, usize) {
412418
let mut data = Vec::new();
413419

@@ -416,7 +422,8 @@ fn create_report_data(
416422
data.push(nucleobase);
417423
}
418424

419-
let (bases, matches, max_rows) = get_static_reads(bam_path, fasta_path, chrom, from, to);
425+
let (bases, matches, max_rows) =
426+
get_static_reads(bam_path, fasta_path, chrom, from, to, max_read_depth);
420427

421428
for b in bases {
422429
let base = json!(b);

src/bcf/report/table_report/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use std::error::Error;
1010
use std::fs;
1111
use std::path::Path;
1212

13+
#[allow(clippy::too_many_arguments)]
1314
pub fn table_report(
1415
vcf: &str,
1516
fasta: &str,
@@ -18,6 +19,7 @@ pub fn table_report(
1819
sample: &str,
1920
info: Option<Values>,
2021
format: Option<Values>,
22+
max_read_depth: u32,
2123
) -> Result<(), Box<dyn Error>> {
2224
let detail_path = output_path.to_owned() + "/details/" + sample;
2325
fs::create_dir(Path::new(&detail_path)).unwrap_or_else(|_| {
@@ -49,5 +51,6 @@ pub fn table_report(
4951
format_strings,
5052
sample.to_owned(),
5153
output_path,
54+
max_read_depth,
5255
)?)
5356
}

src/bcf/report/table_report/static_reader.rs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ use crate::bcf::report::table_report::alignment_reader::{
22
make_nucleobases, read_indexed_bam, AlignmentMatch, AlignmentNucleobase,
33
};
44
use crate::bcf::report::table_report::create_report_table::VariantType;
5+
use rand::rngs::StdRng;
6+
use rand::seq::IteratorRandom;
7+
use rand_core::SeedableRng;
58
use serde::Serialize;
6-
use std::collections::BTreeMap;
9+
use std::collections::{BTreeMap, HashSet};
710
use std::path::Path;
811

912
#[derive(Serialize, Clone, Debug)]
@@ -34,12 +37,13 @@ pub struct Variant {
3437
fn calc_rows(
3538
reads: Vec<AlignmentNucleobase>,
3639
matches: Vec<AlignmentMatch>,
40+
max_read_depth: u32,
3741
) -> (
3842
Vec<StaticAlignmentNucleobase>,
3943
Vec<StaticAlignmentMatch>,
4044
usize,
4145
) {
42-
let mut row_ends = vec![0; 1000];
46+
let mut row_ends = vec![0; 10000];
4347

4448
let mut read_names: BTreeMap<String, u16> = BTreeMap::new();
4549

@@ -54,7 +58,7 @@ fn calc_rows(
5458
if read_names.contains_key(&r.name) {
5559
row = *read_names.get(&r.name).unwrap();
5660
} else {
57-
for (i, _) in row_ends.iter().enumerate().take(1000).skip(1) {
61+
for (i, _) in row_ends.iter().enumerate().take(10000).skip(1) {
5862
if r.read_start > row_ends[i] {
5963
if i > max_row {
6064
max_row = i;
@@ -102,6 +106,23 @@ fn calc_rows(
102106
reads_wr.push(base);
103107
}
104108

109+
if max_row > max_read_depth as usize {
110+
let mut rng = StdRng::seed_from_u64(42);
111+
let random_rows: HashSet<_> = (0..max_row as u32)
112+
.choose_multiple(&mut rng, max_read_depth as usize)
113+
.into_iter()
114+
.collect();
115+
reads_wr = reads_wr
116+
.into_iter()
117+
.filter(|b| random_rows.contains(&&(b.row as u32)))
118+
.collect();
119+
matches_wr = matches_wr
120+
.into_iter()
121+
.filter(|b| random_rows.contains(&&(b.row as u32)))
122+
.collect();
123+
max_row = max_read_depth as usize;
124+
}
125+
105126
(reads_wr, matches_wr, max_row)
106127
}
107128

@@ -111,12 +132,13 @@ pub fn get_static_reads(
111132
chrom: String,
112133
from: u64,
113134
to: u64,
135+
max_read_depth: u32,
114136
) -> (
115137
Vec<StaticAlignmentNucleobase>,
116138
Vec<StaticAlignmentMatch>,
117139
usize,
118140
) {
119141
let alignments = read_indexed_bam(path, chrom.clone(), from, to);
120142
let (msm, m) = make_nucleobases(fasta_path, chrom, alignments, from, to);
121-
calc_rows(msm, m)
143+
calc_rows(msm, m, max_read_depth)
122144
}

src/cli.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ subcommands:
239239
short: c
240240
default_value: "1000"
241241
help: Set the maximum number of cells in the oncoprint per page. Lowering max-cells should improve the performance of the plots in the browser. Default value is 1000.
242+
- max-read-depth:
243+
required: false
244+
short: d
245+
default_value: "500"
246+
help: Set the maximum read depth for plots of the third stage. If the maximum depth is exceeded, rows are randomly (with seed for reproducibility) chosen. Default value is 500.
242247
- info:
243248
multiple: true
244249
required: false

src/main.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ fn main() -> Result<(), Box<dyn Error>> {
8686
});
8787
}
8888
let max_cells = u32::from_str(matches.value_of("max-cells").unwrap()).unwrap();
89+
let max_read_depth =
90+
u32::from_str(matches.value_of("max-read-depth").unwrap()).unwrap();
8991
let custom_js = matches.value_of("custom-js");
9092
let tsv_data = matches.value_of("tsv");
9193
bcf::report::embed_js(output_path, custom_js)?;
@@ -123,6 +125,7 @@ fn main() -> Result<(), Box<dyn Error>> {
123125
sample,
124126
infos.clone(),
125127
formats.clone(),
128+
max_read_depth,
126129
)?;
127130
}
128131

0 commit comments

Comments
 (0)