Skip to content

Commit 1bad536

Browse files
committed
added multi-batch processing for deltaG and fixed bug for batch calculation
1 parent 5fc0795 commit 1bad536

File tree

3 files changed

+46
-39
lines changed

3 files changed

+46
-39
lines changed

varvamp/scripts/primers.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import functools
1010

1111
# LIBS
12-
from Bio.Seq import Seq
12+
from Bio.Seq import MutableSeq
1313
from Bio import SeqIO
1414
import primer3 as p3
1515

@@ -100,9 +100,10 @@ def is_dimer(seq1, seq2):
100100
check if two sequences dimerize above threshold or are overlapping at their ends
101101
"""
102102
dimer_result = calc_dimer(seq1, seq2, structure=True)
103-
103+
# check both the temperature and the deltaG
104104
if dimer_result.tm > config.PRIMER_MAX_DIMER_TMP or dimer_result.dg < config.PRIMER_MAX_DIMER_DELTAG:
105105
return True
106+
# check for perfect end overlaps (this can result in primer extensions even though the tm/dg are okay)
106107
if has_end_overlap(dimer_result):
107108
return True
108109

@@ -175,7 +176,7 @@ def rev_complement(seq):
175176
"""
176177
reverse complement a sequence
177178
"""
178-
return str(Seq(seq).reverse_complement())
179+
return str(MutableSeq(seq).reverse_complement(inplace=True))
179180

180181

181182
def calc_permutation_penalty(amb_seq):
@@ -353,18 +354,18 @@ def _process_kmer_batch(ambiguous_consensus, alignment, kmers):
353354
for kmer in kmers:
354355
if not filter_kmer_direction_independent(kmer[0]):
355356
continue
356-
357+
# calc penalties
357358
base_penalty = calc_base_penalty(kmer[0], config.PRIMER_TMP, config.PRIMER_GC_RANGE, config.PRIMER_SIZES)
358359
per_base_mismatches = calc_per_base_mismatches(kmer, alignment, ambiguous_consensus)
359360
permutation_penalty = calc_permutation_penalty(ambiguous_consensus[kmer[1]:kmer[2]])
360-
361+
# some filters depend on the direction of each primer
361362
for direction in ["+", "-"]:
362363
if not filter_kmer_direction_dependend(direction, kmer, ambiguous_consensus):
363364
continue
364-
365+
# calc penalties
365366
three_prime_penalty = calc_3_prime_penalty(direction, per_base_mismatches)
366367
primer_penalty = base_penalty + permutation_penalty + three_prime_penalty
367-
368+
# add to lists depending on their direction
368369
if direction == "+":
369370
left_primers.append([kmer[0], kmer[1], kmer[2], primer_penalty, per_base_mismatches])
370371
else:
@@ -383,7 +384,7 @@ def find_primers(kmers, ambiguous_consensus, alignment, num_processes):
383384

384385
# Convert kmers set to list for slicing
385386
kmers = list(kmers)
386-
batch_size = int(len(kmers)/num_processes)
387+
batch_size = max(1, int(len(kmers)/num_processes))
387388

388389
# Split kmers into batches
389390
batches = [kmers[i:i + batch_size] for i in range(0, len(kmers), batch_size)]
@@ -416,7 +417,7 @@ def create_primer_dictionary(primer_candidates, direction):
416417
for primer in primer_candidates:
417418
if direction == "+":
418419
direction_name = "LEFT"
419-
elif direction == "-":
420+
else:
420421
direction_name = "RIGHT"
421422
primer_name = f"{direction_name}_{primer_idx}"
422423
primer_dict[primer_name] = primer
@@ -540,6 +541,7 @@ def filter_non_dimer_candidates(primer_candidates, external_sequences, n_process
540541

541542
with multiprocessing.Pool(processes=n_processes) as pool:
542543
# Prepare arguments based on input type
544+
# qpcr probes are stored in dictionaries --> result in tuples when unpacked
543545
if is_dict:
544546
results = pool.map(callable_f, primer_candidates.items())
545547
else:

varvamp/scripts/qpcr.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def get_qpcr_probes(kmers, ambiguous_consensus, alignment_cleaned, num_processes
9999
kmers = list(kmers)
100100

101101
# Split kmers into batches
102-
batch_size = int(len(kmers) / num_processes)
102+
batch_size = max(1, int(len(kmers) / num_processes))
103103
batches = [kmers[i:i + batch_size] for i in range(0, len(kmers), batch_size)]
104104

105105
# Prepare arguments for each dimer
@@ -282,7 +282,7 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
282282
amplicon_nr = -1
283283

284284
# Prepare arguments for parallel processing - pass full primer lists
285-
batch_size = int(len(qpcr_probes) / num_processes)
285+
batch_size = max(1, int(len(qpcr_probes) / num_processes))
286286
callable_f = functools.partial(
287287
find_single_qpcr_scheme,
288288
left_primer_candidates, right_primer_candidates, qpcr_probes, majority_consensus, ambiguous_consensus
@@ -315,7 +315,7 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
315315
return qpcr_scheme_candidates
316316

317317

318-
def process_single_amplicon_deltaG(amplicon, majority_consensus):
318+
def process_single_amplicon_deltaG(majority_consensus, amplicon):
319319
"""
320320
Process a single amplicon to test its deltaG and apply filtering.
321321
This function will be called concurrently by multiple threads.
@@ -341,32 +341,33 @@ def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n
341341
"""
342342
final_amplicons = []
343343

344-
# Create a pool of processes to handle the concurrent processing
344+
# Create a list of the first n amplicon tuples for processing
345+
# The list is sorted first on whether offset targets were predicted for the amplicon,
346+
# then by penalty. This ensures that amplicons with offset targets are always considered last
347+
amplicons = list(sorted(qpcr_schemes_candidates, key=lambda x: (x.get("offset_targets", False), x["penalty"])))
348+
# process amplicons concurrently
349+
batch_size = max(1, int(n_to_test / n_processes))
350+
callable_f = functools.partial(
351+
process_single_amplicon_deltaG,
352+
majority_consensus
353+
)
345354
with multiprocessing.Pool(processes=n_processes) as pool:
346-
# Create a list of the first n amplicon tuples for processing
347-
# The list is sorted first on whether offset targets were predicted for the amplicon,
348-
# then by penalty. This ensures that amplicons with offset targets are always considered last
349-
amplicons = itertools.islice(
350-
sorted(qpcr_schemes_candidates, key=lambda x: (x.get("offset_targets", False), x["penalty"])),
351-
n_to_test
352-
)
353-
# process amplicons concurrently
354-
results = pool.starmap(process_single_amplicon_deltaG, [(amp, majority_consensus) for amp in amplicons])
355-
# Process the results
356-
retained_ranges = []
357-
for amp in results:
358-
# check if the amplicon overlaps with an amplicon that was previously
359-
# found and had a high enough deltaG
360-
if amp["deltaG"] <= deltaG_cutoff:
361-
continue
362-
amp_range = range(amp["LEFT"][1], amp["RIGHT"][2])
363-
overlaps_retained = False
364-
for r in retained_ranges:
365-
if amp_range.start < r.stop and r.start < amp_range.stop:
366-
overlaps_retained = True
367-
break
368-
if not overlaps_retained:
369-
final_amplicons.append(amp)
370-
retained_ranges.append(amp_range)
355+
results = pool.map(callable_f, amplicons, chunksize=batch_size)
356+
# Process the results
357+
retained_ranges = []
358+
for amp in results:
359+
# check if the amplicon overlaps with an amplicon that was previously
360+
# found and had a high enough deltaG
361+
if amp["deltaG"] <= deltaG_cutoff:
362+
continue
363+
amp_range = range(amp["LEFT"][1], amp["RIGHT"][2])
364+
overlaps_retained = False
365+
for r in retained_ranges:
366+
if amp_range.start < r.stop and r.start < amp_range.stop:
367+
overlaps_retained = True
368+
break
369+
if not overlaps_retained:
370+
final_amplicons.append(amp)
371+
retained_ranges.append(amp_range)
371372

372373
return final_amplicons

varvamp/scripts/scheme.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,10 @@ def test_scheme_for_dimers(amplicon_scheme):
299299

300300
primer_dimers = []
301301
non_dimers = {amp["pool"]:set() for amp in amplicon_scheme}
302-
# write all primer sequences in the respective pools
302+
# write all primer sequences in the respective pools -->
303+
# these primers should not be violated by primer switching
304+
# and primers are only switched later if no primer dimers
305+
# with the existing 'good' scheme are created
303306
for amp in amplicon_scheme:
304307
non_dimers[amp["pool"]].add(amp["LEFT"][0])
305308
non_dimers[amp["pool"]].add(amp["RIGHT"][0])
@@ -324,6 +327,7 @@ def test_scheme_for_dimers(amplicon_scheme):
324327
# and remember all tested primers
325328
tested_primers.append(current_primer)
326329

330+
# report both dimers and non-dimers
327331
return primer_dimers, non_dimers
328332

329333

0 commit comments

Comments
 (0)