@@ -341,60 +341,6 @@ def filter_kmer_direction_dependend(direction, kmer, ambiguous_consensus):
341341 )
342342
343343
344- def parse_primer_fasta (fasta_path ):
345- """
346- Parse a primer FASTA file and return a list of sequences using BioPython.
347- """
348-
349- sequences = []
350-
351- for record in SeqIO .parse (fasta_path , "fasta" ):
352- seq = str (record .seq ).lower ()
353- # Only include primers up to 40 nucleotides
354- if len (seq ) <= 40 :
355- sequences .append (reporting .get_permutations (seq ))
356-
357- return list (chain .from_iterable (sequences ))
358-
359-
360- def check_primer_against_externals (args ):
361- """
362- Worker function to check a single primer against all external sequences.
363- Returns the primer if it passes, None otherwise.
364- """
365-
366- primer , external_sequences = args
367-
368- for seq in external_sequences :
369- if is_dimer (primer [0 ], seq ):
370- return None
371-
372- return primer
373-
374-
375- def filter_non_dimer_candidates (primer_candidates , external_sequences , n_threads ):
376- """
377- Filter out primer candidates that form dimers with external sequences.
378- Uses multiprocessing to speed up checks.
379- """
380- # Deduplicate external sequences to reduce redundant checks
381- unique_sequences = []
382- seen = set ()
383- for seq in external_sequences :
384- if seq not in seen :
385- unique_sequences .append (seq )
386- seen .add (seq )
387-
388- with multiprocessing .Pool (processes = n_threads ) as pool :
389- # Prepare arguments for each primer
390- args = [(primer , unique_sequences ) for primer in primer_candidates ]
391- # Process in parallel
392- results = pool .map (check_primer_against_externals , args )
393-
394- # Filter out None results
395- return [primer for primer in results if primer is not None ]
396-
397-
398344def _process_kmer_batch (args ):
399345 """
400346 Helper function for multiprocessing: process a batch of kmers.
@@ -528,3 +474,68 @@ def find_best_primers(left_primer_candidates, right_primer_candidates, high_cons
528474
529475 # and create a dict
530476 return all_primers
477+
478+
479+ def parse_primer_fasta (fasta_path ):
480+ """
481+ Parse a primer FASTA file and return a list of sequences using BioPython.
482+ """
483+
484+ sequences = []
485+
486+ for record in SeqIO .parse (fasta_path , "fasta" ):
487+ seq = str (record .seq ).lower ()
488+ # Only include primers up to 40 nucleotides
489+ if len (seq ) <= 40 :
490+ sequences .append (reporting .get_permutations (seq ))
491+
492+ return list (chain .from_iterable (sequences ))
493+
494+
495+ def check_primer_against_externals (args ):
496+ """
497+ Worker function to check a single primer against all external sequences.
498+ Returns the primer if it passes, None otherwise.
499+ Handles both list format and dict format (name, data) tuples.
500+ """
501+ primer , external_sequences = args
502+
503+ # Extract sequence based on input format
504+ if isinstance (primer , tuple ):
505+ name , data = primer
506+ seq = data [0 ]
507+ else :
508+ seq = primer [0 ]
509+
510+ for ext_seq in external_sequences :
511+ if is_dimer (seq , ext_seq ):
512+ return None
513+
514+ return primer
515+
516+
517+ def filter_non_dimer_candidates (primer_candidates , external_sequences , n_threads ):
518+ """
519+ Filter out primer candidates that form dimers with external sequences.
520+ Uses multiprocessing to speed up checks.
521+ """
522+ is_dict = isinstance (primer_candidates , dict )
523+
524+ # Deduplicate external sequences
525+ unique_sequences = list (set (external_sequences ))
526+
527+ with multiprocessing .Pool (processes = n_threads ) as pool :
528+ # Prepare arguments based on input type
529+ if is_dict :
530+ args = [((name , data ), unique_sequences ) for name , data in primer_candidates .items ()]
531+ else :
532+ args = [(primer , unique_sequences ) for primer in primer_candidates ]
533+
534+ results = pool .map (check_primer_against_externals , args )
535+
536+ # Filter and restore original format
537+ if is_dict :
538+ filtered_results = [result for result in results if result is not None ]
539+ return {name : data for name , data in filtered_results }
540+ else :
541+ return [primer for primer in results if primer is not None ]
0 commit comments