Skip to content

Commit 549d48b

Browse files
committed
finalized review from wm75
1 parent 4fb8226 commit 549d48b

File tree

5 files changed

+86
-57
lines changed

5 files changed

+86
-57
lines changed

varvamp/command.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,11 @@ def shared_workflow(args, log_file):
195195
# read in external primer sequences with which new primers should not form dimers
196196
if args.compatible_primers is not None:
197197
compatible_primers = primers.parse_primer_fasta(args.compatible_primers)
198+
if not compatible_primers:
199+
logging.raise_error(
200+
"no valid primers found in the provided primer file.\n",
201+
log_file,
202+
)
198203
else:
199204
compatible_primers = None
200205
# check alignment length and number of gaps and report if its significantly more/less than expected
@@ -304,7 +309,7 @@ def shared_workflow(args, log_file):
304309
)
305310

306311
# filter primers against user-provided list of compatible primers, can use multi-processing
307-
if compatible_primers is not None:
312+
if compatible_primers:
308313
left_primer_candidates = primers.filter_non_dimer_candidates(
309314
left_primer_candidates, compatible_primers, args.threads
310315
)
@@ -394,7 +399,7 @@ def single_workflow(args, amplicons, log_file):
394399
return amplicon_scheme
395400

396401

397-
def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candidates, all_primers, ambiguous_consensus, log_file, results_dir):
402+
def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candidates, all_primers, ambiguous_consensus, log_file):
398403
"""
399404
part of the workflow specific for the tiled mode
400405
"""
@@ -452,9 +457,8 @@ def tiled_workflow(args, amplicons, left_primer_candidates, right_primer_candida
452457
job="Trying to solve primer dimers.",
453458
progress_text=f"{len(dimers_not_solved)}/{n_initial_dimers} dimers could not be resolved"
454459
)
455-
reporting.write_dimers(results_dir, dimers_not_solved)
456460

457-
return amplicon_scheme
461+
return amplicon_scheme, dimers_not_solved
458462

459463

460464
def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majority_consensus, left_primer_candidates, right_primer_candidates, compatible_primers, log_file):
@@ -495,7 +499,7 @@ def qpcr_workflow(args, data_dir, alignment_cleaned, ambiguous_consensus, majori
495499
)
496500

497501
# filter primers against non-dimer sequences if provided
498-
if compatible_primers is not None:
502+
if compatible_primers:
499503
qpcr_probes = primers.filter_non_dimer_candidates(
500504
qpcr_probes, compatible_primers, args.threads)
501505
logging.varvamp_progress(
@@ -591,6 +595,7 @@ def main():
591595

592596
# SINGLE/TILED mode
593597
if args.mode == "tiled" or args.mode == "single":
598+
dimers_not_solved = None
594599
all_primers, amplicons = single_and_tiled_shared_workflow(
595600
args,
596601
left_primer_candidates,
@@ -606,15 +611,14 @@ def main():
606611
log_file
607612
)
608613
elif args.mode == "tiled":
609-
amplicon_scheme = tiled_workflow(
614+
amplicon_scheme, dimers_not_solved = tiled_workflow(
610615
args,
611616
amplicons,
612617
left_primer_candidates,
613618
right_primer_candidates,
614619
all_primers,
615620
ambiguous_consensus,
616621
log_file,
617-
results_dir
618622
)
619623

620624
# write files
@@ -631,7 +635,8 @@ def main():
631635
ambiguous_consensus,
632636
args.name,
633637
args.mode,
634-
log_file
638+
log_file,
639+
dimers_not_solved
635640
)
636641
reporting.varvamp_plot(
637642
results_dir,

varvamp/scripts/primers.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import itertools
77
import re
88
import multiprocessing
9+
import functools
910

1011
# LIBS
1112
from Bio.Seq import Seq
@@ -341,12 +342,11 @@ def filter_kmer_direction_dependend(direction, kmer, ambiguous_consensus):
341342
)
342343

343344

344-
def _process_kmer_batch(args):
345+
def _process_kmer_batch(ambiguous_consensus, alignment, kmers):
345346
"""
346347
Helper function for multiprocessing: process a batch of kmers.
347348
Returns (left_primers, right_primers) tuples.
348349
"""
349-
kmers, ambiguous_consensus, alignment = args
350350
left_primers = []
351351
right_primers = []
352352

@@ -387,11 +387,16 @@ def find_primers(kmers, ambiguous_consensus, alignment, num_processes):
387387

388388
# Split kmers into batches
389389
batches = [kmers[i:i + batch_size] for i in range(0, len(kmers), batch_size)]
390-
args_list = [(batch, ambiguous_consensus, alignment) for batch in batches]
391390

392-
# Process batches in parallel
391+
# Prepare arguments for each dimer
392+
callable_f = functools.partial(
393+
_process_kmer_batch,
394+
ambiguous_consensus, alignment
395+
)
396+
397+
# Solve dimers in parallel
393398
with multiprocessing.Pool(processes=num_processes) as pool:
394-
results = pool.map(_process_kmer_batch, args_list)
399+
results = pool.map(callable_f, batches)
395400

396401
# Aggregate results
397402
left_primer_candidates = []
@@ -502,13 +507,12 @@ def parse_primer_fasta(fasta_path):
502507
return list(set(sequences)) # deduplication
503508

504509

505-
def check_primer_against_externals(args):
510+
def check_primer_against_externals(external_sequences, primer):
506511
"""
507512
Worker function to check a single primer against all external sequences.
508513
Returns the primer if it passes, None otherwise.
509514
Handles both list format and dict format (name, data) tuples.
510515
"""
511-
primer, external_sequences = args
512516

513517
# Extract sequence based on input format
514518
if isinstance(primer, tuple):
@@ -524,21 +528,24 @@ def check_primer_against_externals(args):
524528
return primer
525529

526530

527-
def filter_non_dimer_candidates(primer_candidates, external_sequences, n_threads):
531+
def filter_non_dimer_candidates(primer_candidates, external_sequences, n_processes):
528532
"""
529533
Filter out primer candidates that form dimers with external sequences.
530534
Uses multiprocessing to speed up checks.
531535
"""
532536
is_dict = isinstance(primer_candidates, dict)
533537

534-
with multiprocessing.Pool(processes=n_threads) as pool:
538+
callable_f = functools.partial(
539+
check_primer_against_externals,
540+
external_sequences
541+
)
542+
543+
with multiprocessing.Pool(processes=n_processes) as pool:
535544
# Prepare arguments based on input type
536545
if is_dict:
537-
args = [((name, data), external_sequences) for name, data in primer_candidates.items()]
546+
results = pool.map(callable_f, primer_candidates.items())
538547
else:
539-
args = [(primer, external_sequences) for primer in primer_candidates]
540-
541-
results = pool.map(check_primer_against_externals, args)
548+
results = pool.map(callable_f, primer_candidates)
542549

543550
# Filter and restore original format
544551
if is_dict:

varvamp/scripts/qpcr.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
import seqfold
88
import itertools
99
import multiprocessing
10+
import functools
1011

1112
# varVAMP
1213
from varvamp.scripts import config
1314
from varvamp.scripts import primers
14-
from varvamp.scripts import reporting
1515

1616

1717
def choose_probe_direction(seq):
@@ -51,12 +51,11 @@ def filter_probe_direction_dependent(seq):
5151
)
5252

5353

54-
def _process_kmer_batch_probes(args):
54+
def _process_kmer_batch_probes(ambiguous_consensus, alignment_cleaned, kmers):
5555
"""
5656
Helper function for multiprocessing: process a batch of kmers for probes.
5757
Returns probe_candidates dictionary.
5858
"""
59-
kmers, ambiguous_consensus, alignment_cleaned = args
6059
probe_candidates = {}
6160
probe_idx = 0
6261

@@ -91,7 +90,7 @@ def _process_kmer_batch_probes(args):
9190
return probe_candidates
9291

9392

94-
def get_qpcr_probes(kmers, ambiguous_consensus, alignment_cleaned, num_processes, batch_size=1000):
93+
def get_qpcr_probes(kmers, ambiguous_consensus, alignment_cleaned, num_processes):
9594
"""
9695
Find potential qPCR probes using multiprocessing.
9796
"""
@@ -100,12 +99,16 @@ def get_qpcr_probes(kmers, ambiguous_consensus, alignment_cleaned, num_processes
10099
kmers = list(kmers)
101100

102101
# Split kmers into batches
102+
batch_size = int(len(kmers) / num_processes)
103103
batches = [kmers[i:i + batch_size] for i in range(0, len(kmers), batch_size)]
104-
args_list = [(batch, ambiguous_consensus, alignment_cleaned) for batch in batches]
105104

106-
# Process batches in parallel
105+
# Prepare arguments for each dimer
106+
callable_f = functools.partial(
107+
_process_kmer_batch_probes,
108+
ambiguous_consensus, alignment_cleaned
109+
)
107110
with multiprocessing.Pool(processes=num_processes) as pool:
108-
results = pool.map(_process_kmer_batch_probes, args_list)
111+
results = pool.map(callable_f, batches)
109112

110113
# Aggregate results and re-index probe names
111114
probe_candidates = {}
@@ -245,11 +248,14 @@ def assess_amplicons(left_subset, right_subset, qpcr_probes, probe, majority_con
245248
return primer_combinations
246249

247250

248-
def find_single_qpcr_scheme(probe_name, probe_data, left_primer_candidates, right_primer_candidates, qpcr_probes,
249-
majority_consensus, ambiguous_consensus):
251+
def find_single_qpcr_scheme(left_primer_candidates, right_primer_candidates, qpcr_probes,
252+
majority_consensus, ambiguous_consensus, probe):
250253
"""
251254
Find a qPCR scheme for a single probe.
252255
"""
256+
257+
probe_name, probe_data = probe
258+
253259
# Generate flanking subsets within the worker process
254260
left_subset = flanking_primer_subset(left_primer_candidates, "+", probe_data)
255261
right_subset = flanking_primer_subset(right_primer_candidates, "-", probe_data)
@@ -266,7 +272,7 @@ def find_single_qpcr_scheme(probe_name, probe_data, left_primer_candidates, righ
266272

267273

268274
def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidates,
269-
majority_consensus, ambiguous_consensus, num_processes, batch_size=100):
275+
majority_consensus, ambiguous_consensus, num_processes):
270276
"""
271277
Find final qPCR schemes using multiprocessing to evaluate probes in parallel.
272278
Probes are sorted by penalty, ensuring optimal probe selection.
@@ -276,15 +282,15 @@ def find_qcr_schemes(qpcr_probes, left_primer_candidates, right_primer_candidate
276282
amplicon_nr = -1
277283

278284
# Prepare arguments for parallel processing - pass full primer lists
279-
args_list = [
280-
(probe_name, probe_data, left_primer_candidates, right_primer_candidates,
281-
qpcr_probes, majority_consensus, ambiguous_consensus)
282-
for probe_name, probe_data in qpcr_probes.items()
283-
]
285+
batch_size = int(len(qpcr_probes) / num_processes)
286+
callable_f = functools.partial(
287+
find_single_qpcr_scheme,
288+
left_primer_candidates, right_primer_candidates, qpcr_probes, majority_consensus, ambiguous_consensus
289+
)
284290

285291
# Process probes in parallel
286292
with multiprocessing.Pool(processes=num_processes) as pool:
287-
results = pool.map(find_single_qpcr_scheme, args_list, chunksize=batch_size)
293+
results = pool.map(callable_f, qpcr_probes.items(), chunksize=batch_size)
288294

289295
# Aggregate results in original probe order (sorted by penalty)
290296
for probe_name, primer_combination in results:
@@ -327,7 +333,7 @@ def process_single_amplicon_deltaG(amplicon, majority_consensus):
327333
return amplicon
328334

329335

330-
def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n_to_test, deltaG_cutoff, n_threads):
336+
def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n_to_test, deltaG_cutoff, n_processes):
331337
"""
332338
Test all amplicon deltaGs for the top n hits at the lowest primer temperature
333339
and filters if they fall below the cutoff. Multiple processes are used
@@ -336,7 +342,7 @@ def test_amplicon_deltaG_parallel(qpcr_schemes_candidates, majority_consensus, n
336342
final_amplicons = []
337343

338344
# Create a pool of processes to handle the concurrent processing
339-
with multiprocessing.Pool(processes=n_threads) as pool:
345+
with multiprocessing.Pool(processes=n_processes) as pool:
340346
# Create a list of the first n amplicon tuples for processing
341347
# The list is sorted first on whether offset targets were predicted for the amplicon,
342348
# then by penalty. This ensures that amplicons with offset targets are always considered last

varvamp/scripts/reporting.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def write_qpcr_to_files(path, final_schemes, ambiguous_consensus, scheme_name, l
209209
print(f">{primer_name}\n{seq.upper()}", file=fasta)
210210

211211

212-
def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_name, mode, log_file):
212+
def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_name, mode, log_file, primer_dimers=None):
213213
"""
214214
write all relevant bed files and a tsv file with all primer stats
215215
"""
@@ -218,6 +218,9 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
218218
amplicon_bed_file = os.path.join(path, "amplicons.bed")
219219
tabular_file = os.path.join(path, "primer_to_amplicon_assignment.tabular")
220220

221+
# Map old primer names to new amplicon-based names
222+
name_mapping = {}
223+
221224
# open files to write
222225
with open(tsv_file, "w") as tsv, open(amplicon_bed_file, "w") as bed, open(tabular_file, "w") as tabular:
223226
# write header for primer tsv
@@ -233,11 +236,11 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
233236
if mode == "single":
234237
primer_fasta_file = os.path.join(path, "primers.fasta")
235238
else:
236-
primer_fasta_file = os.path.join(path, f"primers_pool_{pool+1}.fasta")
239+
primer_fasta_file = os.path.join(path, f"primers_pool_{pool + 1}.fasta")
237240
with open(primer_fasta_file, "w") as primer_fasta:
238241
for counter, amp in enumerate(amplicon_scheme[pool::len(pools)]):
239242
# give a new amplicon name
240-
amplicon_index = counter*len(pools) + pool
243+
amplicon_index = counter * len(pools) + pool
241244
amp_name = f"{scheme_name}_{amplicon_index}"
242245
# get left and right primers and their names
243246
amp_length = amp["RIGHT"][2] - amp["LEFT"][1]
@@ -251,7 +254,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
251254
amplicon_has_off_target = "n.d."
252255
# write amplicon bed
253256
if mode == "tiled":
254-
bed_score = pool+1
257+
bed_score = pool + 1
255258
elif mode == "single":
256259
bed_score = round(amp["LEFT"][3] + amp["RIGHT"][3], 1)
257260
amplicon_bed_records.append(
@@ -269,6 +272,10 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
269272
(f"{amp_name}_LEFT", f"{amp_name}_RIGHT")
270273
)
271274
)
275+
# Build name mapping for dimers
276+
name_mapping[amp["LEFT"][-1]] = f"{amp_name}_LEFT"
277+
name_mapping[amp["RIGHT"][-1]] = f"{amp_name}_RIGHT"
278+
272279
# write primer tsv and primer bed
273280
for direction, primer in [("+", amp["LEFT"]), ("-", amp["RIGHT"])]:
274281
seq = ambiguous_consensus[primer[1]:primer[2]]
@@ -288,7 +295,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
288295
amp_length,
289296
primer_name,
290297
primer[-1],
291-
pool+1,
298+
pool + 1,
292299
primer[1] + 1,
293300
primer[2],
294301
seq.upper(),
@@ -306,7 +313,7 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
306313
(
307314
# will need amplicon_index for sorting
308315
amplicon_index,
309-
(primer_name, primer, pool+1, direction, seq.upper())
316+
(primer_name, primer, pool + 1, direction, seq.upper())
310317
)
311318
)
312319
# write amplicon bed with amplicons sorted by start position
@@ -333,8 +340,12 @@ def write_scheme_to_files(path, amplicon_scheme, ambiguous_consensus, scheme_nam
333340
*record[1]
334341
)
335342

343+
# Write dimers with renamed primers
344+
if primer_dimers:
345+
write_dimers(path, primer_dimers, name_mapping)
346+
336347

337-
def write_dimers(path, primer_dimers):
348+
def write_dimers(path, primer_dimers, name_mapping):
338349
"""
339350
write dimers for which no replacement was found to file
340351
"""
@@ -348,8 +359,8 @@ def write_dimers(path, primer_dimers):
348359
)
349360
print(
350361
pool+1,
351-
primer1[2][0],
352-
primer2[2][0],
362+
name_mapping[primer1[1]],
363+
name_mapping[primer2[1]],
353364
round(dimer_result.tm, 1),
354365
dimer_result.dg,
355366
sep="\t",

0 commit comments

Comments
 (0)