1414__authors__ = Marco Reverenna
1515__copyright__ = Copyright 2025-2026
1616__research-group__ = DTU Biosustain (Multi-omics Network Analytics) and DTU Bioengineering
17- __date__ = 03 Nov 2025
17+ __date__ = 14 Nov 2025
1818__maintainer__ = Marco Reverenna
19192020__status__ = Dev
2828from pathlib import Path
2929from Bio import SeqIO
3030
31- # Setup logging
3231logging .basicConfig (level = logging .INFO , format = "%(asctime)s [%(levelname)s] %(message)s" )
3332logger = logging .getLogger (__name__ )
3433
@@ -47,10 +46,10 @@ def align_or_copy_fasta(fasta_file, output_file):
4746 if len (sequences ) == 1 :
4847 # Only one sequence, no alignment needed
4948 shutil .copy (fasta_file , output_file )
50- logger .info (f"Copied single-sequence file: { Path (fasta_file ).name } " )
49+ logger .debug (f"Copied single-sequence file: { Path (fasta_file ).name } " )
5150 elif len (sequences ) > 1 :
5251 # Multiple sequences, run clustalo
53- logger .info (f"Aligning { len (sequences )} sequences from { Path (fasta_file ).name } ..." )
52+ logger .debug (f"Aligning { len (sequences )} sequences from { Path (fasta_file ).name } ..." )
5453 try :
5554 subprocess .run (
5655 ["clustalo" , "-i" , fasta_file , "-o" , output_file , "--outfmt" , "fa" , "--force" ],
@@ -69,18 +68,16 @@ def align_or_copy_fasta(fasta_file, output_file):
6968 logger .warning (f"Skipping empty FASTA file: { Path (fasta_file ).name } " )
7069
7170
72- def process_alignment (scaffolds_folder : str ):
71+ def process_alignment (input_dir : str , output_dir : str ):
7372 """
74- Align all FASTA files in .../scaffolds/clustering/cluster_fasta
75- and save results in .../scaffolds/alignment/.
73+ Align all FASTA files from input_dir and save results in output_dir.
7674
7775 Args:
78- scaffolds_folder (str): Path to the .../scaffolds/ directory.
76+ input_dir (str): Path to the .../cluster_fasta/ directory.
77+ output_dir (str): Path to the .../alignment/ directory.
7978 """
80- scaffolds_folder_path = Path (scaffolds_folder )
81- clustering_dir = scaffolds_folder_path / "clustering"
82- cluster_fasta_folder = clustering_dir / "cluster_fasta"
83- alignment_folder = scaffolds_folder_path / "alignment"
79+ cluster_fasta_folder = Path (input_dir )
80+ alignment_folder = Path (output_dir )
8481
8582 alignment_folder .mkdir (parents = True , exist_ok = True )
8683
@@ -105,23 +102,20 @@ def process_alignment(scaffolds_folder: str):
105102 logger .info ("All alignment tasks completed." )
106103
107104
108- def main (combo_folder : str ):
105+ def main (input_cluster_fasta_folder : str ,
106+ output_alignment_folder : str ):
109107 """
110108 Main function to run the alignment script.
111109 """
112110 logger .info ("--- Starting Step 4: Alignment ---" )
113-
114- combo_folder_path = Path (combo_folder )
115- scaffolds_folder_path = combo_folder_path / "scaffolds"
116111
117- if not scaffolds_folder_path .exists ():
118- logger .error (f"Scaffolds folder not found: { scaffolds_folder_path } " )
119- raise FileNotFoundError (f"Scaffolds folder not found: { scaffolds_folder_path } " )
112+ logger .info (f"Input Folder (cluster FASTA): { input_cluster_fasta_folder } " )
113+ logger .info (f"Output Folder (Alignments): { output_alignment_folder } " )
120114
121- logger . info ( f"Input (Scaffolds Folder): { scaffolds_folder_path } " )
122-
123- # Call the core logic function, passing the .../scaffolds/ path
124- process_alignment ( scaffolds_folder = str ( scaffolds_folder_path ) )
115+ process_alignment (
116+ input_dir = input_cluster_fasta_folder ,
117+ output_dir = output_alignment_folder
118+ )
125119
126120 logger .info ("--- Step 4: Alignment Completed ---" )
127121
@@ -135,16 +129,28 @@ def cli():
135129 )
136130
137131 parser .add_argument (
138- "--combo-folder" ,
132+ "--input-folder" ,
133+ type = str ,
134+ required = True ,
135+ help = "Path to the folder containing cluster FASTA files (e.g., .../cluster_fasta)."
136+ )
137+ parser .add_argument (
138+ "--output-folder" ,
139139 type = str ,
140140 required = True ,
141- help = "Path to the 'comb_...' folder (output of the assembly step )."
141+ help = "Path to the folder to save aligned .afa files (e.g., .../alignment )."
142142 )
143143
144144 args = parser .parse_args ()
145-
146- main (combo_folder = args .combo_folder )
145+
146+ main (input_cluster_fasta_folder = args .input_folder ,
147+ output_alignment_folder = args .output_folder
148+ )
147149
148150
149151if __name__ == "__main__" :
150- cli ()
152+ cli ()
153+
154+ # python -m instanexus.alignment \
155+ # --input-folder outputs/bsa/scaffolds/clustering/cluster_fasta \
156+ # --output-folder outputs/bsa/scaffolds/alignment
0 commit comments