Skip to content

Commit d519814

Browse files
committed
giving the amplicon finding script a no-trim escape hatch where it will only find amplicons
1 parent c857224 commit d519814

File tree

1 file changed

+47
-4
lines changed

1 file changed

+47
-4
lines changed

bin/find_and_trim_amplicons.rs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ struct Args {
175175
#[arg(long)]
176176
no_compress: bool,
177177

178+
/// Disable primer trimming and only find amplicons
179+
#[arg(long)]
180+
no_trim: bool,
181+
178182
/// Statistics output file (default: stderr)
179183
#[arg(long)]
180184
stats: Option<PathBuf>,
@@ -606,6 +610,8 @@ struct PrimerTrimProcessor {
606610
output_format: OutputFormat,
607611
/// Shared statistics tracker (thread-safe)
608612
stats: SharedStats,
613+
/// find but do not trim primers in amplicons
614+
find_only: bool,
609615
}
610616

611617
/// Configured primer search with optional windowing.
@@ -801,6 +807,7 @@ impl PrimerTrimProcessor {
801807
writer: SharedWriter,
802808
output_format: OutputFormat,
803809
stats: SharedStats,
810+
find_only: bool,
804811
) -> Self {
805812
Self {
806813
search: WindowedSearch::new(primers, max_mismatch, forward_window, reverse_window),
@@ -810,6 +817,7 @@ impl PrimerTrimProcessor {
810817
writer,
811818
output_format,
812819
stats,
820+
find_only,
813821
}
814822
}
815823

@@ -842,7 +850,11 @@ impl PrimerTrimProcessor {
842850

843851
// SAFETY: bounds validated by assertions (start <= end, start <= len) and bail check (end <= len)
844852
#[allow(clippy::indexing_slicing)]
845-
let trimmed_seq = &sequence[amplicon.start..amplicon.end];
853+
let trimmed_seq = if self.find_only {
854+
&sequence
855+
} else {
856+
&sequence[amplicon.start..amplicon.end]
857+
};
846858

847859
let mut writer = self.writer.lock().expect("writer mutex poisoned");
848860

@@ -866,7 +878,11 @@ impl PrimerTrimProcessor {
866878
// SAFETY: start <= end (asserted above), end <= quality.len() (bail check above)
867879
// Therefore start <= quality.len()
868880
#[allow(clippy::indexing_slicing)]
869-
let trimmed_qual = &quality[amplicon.start..amplicon.end];
881+
let trimmed_qual = if self.find_only {
882+
quality
883+
} else {
884+
&quality[amplicon.start..amplicon.end]
885+
};
870886

871887
// Write FASTQ record
872888
writeln!(writer, "@{}", record.id_str())?;
@@ -1205,6 +1221,7 @@ fn main() -> Result<()> {
12051221
writer,
12061222
args.format,
12071223
stats.clone(),
1224+
args.no_trim,
12081225
);
12091226

12101227
// Read and process in parallel
@@ -1309,6 +1326,7 @@ mod tests {
13091326
writer,
13101327
OutputFormat::Fastq,
13111328
stats,
1329+
false,
13121330
)
13131331
}
13141332

@@ -1577,6 +1595,7 @@ mod tests {
15771595
writer,
15781596
OutputFormat::Fastq,
15791597
stats.clone(),
1598+
false,
15801599
);
15811600

15821601
// Read: ACGT + ATATATAT + TGCA (RC of TGCA is TGCA)
@@ -1626,6 +1645,7 @@ mod tests {
16261645
writer,
16271646
OutputFormat::Fastq,
16281647
stats.clone(),
1648+
false,
16291649
);
16301650

16311651
// Read with no primers
@@ -1669,6 +1689,7 @@ mod tests {
16691689
writer,
16701690
OutputFormat::Fastq,
16711691
stats.clone(),
1692+
false,
16721693
);
16731694

16741695
// Test 1: No forward primer
@@ -1812,8 +1833,18 @@ mod tests {
18121833
let (writer, _buffer) = create_test_writer();
18131834
let stats = Arc::new(TrimStats::default());
18141835

1815-
let processor =
1816-
PrimerTrimProcessor::new(primers, 0, 5, 100, 0, 0, writer, OutputFormat::Fastq, stats);
1836+
let processor = PrimerTrimProcessor::new(
1837+
primers,
1838+
0,
1839+
5,
1840+
100,
1841+
0,
1842+
0,
1843+
writer,
1844+
OutputFormat::Fastq,
1845+
stats,
1846+
false,
1847+
);
18171848

18181849
let amplicon = AmpliconMatch {
18191850
start: 4,
@@ -1879,6 +1910,7 @@ mod tests {
18791910
writer,
18801911
OutputFormat::Fastq,
18811912
stats.clone(),
1913+
false,
18821914
);
18831915

18841916
// Process a read in forward orientation
@@ -1914,6 +1946,7 @@ mod tests {
19141946
max_len: 2000,
19151947
threads: 4,
19161948
format: OutputFormat::Fastq,
1949+
no_trim: false,
19171950
no_compress: true,
19181951
stats: None,
19191952
forward_window: 0,
@@ -1943,6 +1976,7 @@ mod tests {
19431976
max_len: 2000,
19441977
threads: 4,
19451978
format: OutputFormat::Fastq,
1979+
no_trim: false,
19461980
no_compress: true,
19471981
stats: None,
19481982
forward_window: 0,
@@ -1975,6 +2009,7 @@ mod tests {
19752009
max_len: 2000,
19762010
threads: 4,
19772011
format: OutputFormat::Fastq,
2012+
no_trim: false,
19782013
no_compress: true,
19792014
stats: None,
19802015
forward_window: 0,
@@ -2007,6 +2042,7 @@ mod tests {
20072042
max_len: 50,
20082043
threads: 4,
20092044
format: OutputFormat::Fastq,
2045+
no_trim: false,
20102046
no_compress: true,
20112047
stats: None,
20122048
forward_window: 0,
@@ -2039,6 +2075,7 @@ mod tests {
20392075
max_len: 2000,
20402076
threads: 0, // Invalid
20412077
format: OutputFormat::Fastq,
2078+
no_trim: false,
20422079
no_compress: true,
20432080
stats: None,
20442081
forward_window: 0,
@@ -2071,6 +2108,7 @@ mod tests {
20712108
max_len: 2000,
20722109
threads: 4,
20732110
format: OutputFormat::Fastq,
2111+
no_trim: false,
20742112
no_compress: true,
20752113
stats: None,
20762114
forward_window: 0,
@@ -2136,6 +2174,7 @@ mod tests {
21362174
writer,
21372175
OutputFormat::Fastq,
21382176
stats,
2177+
false,
21392178
);
21402179

21412180
let amplicon = AmpliconMatch {
@@ -2188,6 +2227,7 @@ mod tests {
21882227
writer,
21892228
OutputFormat::Fasta,
21902229
stats,
2230+
false,
21912231
);
21922232

21932233
let amplicon = AmpliconMatch {
@@ -2237,6 +2277,7 @@ mod tests {
22372277
writer,
22382278
OutputFormat::Fastq,
22392279
stats,
2280+
false,
22402281
);
22412282

22422283
let amplicon = AmpliconMatch {
@@ -2276,6 +2317,7 @@ mod tests {
22762317
max_len: 2000,
22772318
threads: 1,
22782319
format: OutputFormat::Fastq,
2320+
no_trim: false,
22792321
no_compress: true,
22802322
stats: None,
22812323
forward_window: 0,
@@ -2317,6 +2359,7 @@ mod tests {
23172359
max_len: 2000,
23182360
threads: 1,
23192361
format: OutputFormat::Fastq,
2362+
no_trim: false,
23202363
no_compress: false,
23212364
stats: None,
23222365
forward_window: 0,

0 commit comments

Comments
 (0)