Skip to content

Commit 5c4511f

Browse files
🐛 refactored the codes
1 parent 0b1ea28 commit 5c4511f

File tree

6 files changed

+304
-561
lines changed

6 files changed

+304
-561
lines changed

src/instanexus/alignment.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
__authors__ = Marco Reverenna
1515
__copyright__ = Copyright 2025-2026
1616
__research-group__ = DTU Biosustain (Multi-omics Network Analytics) and DTU Bioengineering
17-
__date__ = 03 Nov 2025
17+
__date__ = 14 Nov 2025
1818
__maintainer__ = Marco Reverenna
1919
__email__ = [email protected]
2020
__status__ = Dev
@@ -28,7 +28,6 @@
2828
from pathlib import Path
2929
from Bio import SeqIO
3030

31-
# Setup logging
3231
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
3332
logger = logging.getLogger(__name__)
3433

@@ -47,10 +46,10 @@ def align_or_copy_fasta(fasta_file, output_file):
4746
if len(sequences) == 1:
4847
# Only one sequence, no alignment needed
4948
shutil.copy(fasta_file, output_file)
50-
logger.info(f"Copied single-sequence file: {Path(fasta_file).name}")
49+
logger.debug(f"Copied single-sequence file: {Path(fasta_file).name}")
5150
elif len(sequences) > 1:
5251
# Multiple sequences, run clustalo
53-
logger.info(f"Aligning {len(sequences)} sequences from {Path(fasta_file).name}...")
52+
logger.debug(f"Aligning {len(sequences)} sequences from {Path(fasta_file).name}...")
5453
try:
5554
subprocess.run(
5655
["clustalo", "-i", fasta_file, "-o", output_file, "--outfmt", "fa", "--force"],
@@ -69,18 +68,16 @@ def align_or_copy_fasta(fasta_file, output_file):
6968
logger.warning(f"Skipping empty FASTA file: {Path(fasta_file).name}")
7069

7170

72-
def process_alignment(scaffolds_folder: str):
71+
def process_alignment(input_dir: str, output_dir: str):
7372
"""
74-
Align all FASTA files in .../scaffolds/clustering/cluster_fasta
75-
and save results in .../scaffolds/alignment/.
73+
Align all FASTA files from input_dir and save results in output_dir.
7674
7775
Args:
78-
scaffolds_folder (str): Path to the .../scaffolds/ directory.
76+
input_dir (str): Path to the .../cluster_fasta/ directory.
77+
output_dir (str): Path to the .../alignment/ directory.
7978
"""
80-
scaffolds_folder_path = Path(scaffolds_folder)
81-
clustering_dir = scaffolds_folder_path / "clustering"
82-
cluster_fasta_folder = clustering_dir / "cluster_fasta"
83-
alignment_folder = scaffolds_folder_path / "alignment"
79+
cluster_fasta_folder = Path(input_dir)
80+
alignment_folder = Path(output_dir)
8481

8582
alignment_folder.mkdir(parents=True, exist_ok=True)
8683

@@ -105,23 +102,20 @@ def process_alignment(scaffolds_folder: str):
105102
logger.info("All alignment tasks completed.")
106103

107104

108-
def main(combo_folder: str):
105+
def main(input_cluster_fasta_folder: str,
106+
output_alignment_folder: str):
109107
"""
110108
Main function to run the alignment script.
111109
"""
112110
logger.info("--- Starting Step 4: Alignment ---")
113-
114-
combo_folder_path = Path(combo_folder)
115-
scaffolds_folder_path = combo_folder_path / "scaffolds"
116111

117-
if not scaffolds_folder_path.exists():
118-
logger.error(f"Scaffolds folder not found: {scaffolds_folder_path}")
119-
raise FileNotFoundError(f"Scaffolds folder not found: {scaffolds_folder_path}")
112+
logger.info(f"Input Folder (cluster FASTA): {input_cluster_fasta_folder}")
113+
logger.info(f"Output Folder (Alignments): {output_alignment_folder}")
120114

121-
logger.info(f"Input (Scaffolds Folder): {scaffolds_folder_path}")
122-
123-
# Call the core logic function, passing the .../scaffolds/ path
124-
process_alignment(scaffolds_folder=str(scaffolds_folder_path))
115+
process_alignment(
116+
input_dir=input_cluster_fasta_folder,
117+
output_dir=output_alignment_folder
118+
)
125119

126120
logger.info("--- Step 4: Alignment Completed ---")
127121

@@ -135,16 +129,28 @@ def cli():
135129
)
136130

137131
parser.add_argument(
138-
"--combo-folder",
132+
"--input-folder",
133+
type=str,
134+
required=True,
135+
help="Path to the folder containing cluster FASTA files (e.g., .../cluster_fasta)."
136+
)
137+
parser.add_argument(
138+
"--output-folder",
139139
type=str,
140140
required=True,
141-
help="Path to the 'comb_...' folder (output of the assembly step)."
141+
help="Path to the folder to save aligned .afa files (e.g., .../alignment)."
142142
)
143143

144144
args = parser.parse_args()
145-
146-
main(combo_folder=args.combo_folder)
145+
146+
main(input_cluster_fasta_folder=args.input_folder,
147+
output_alignment_folder=args.output_folder
148+
)
147149

148150

149151
if __name__ == "__main__":
150-
cli()
152+
cli()
153+
154+
# python -m instanexus.alignment \
155+
# --input-folder outputs/bsa/scaffolds/clustering/cluster_fasta \
156+
# --output-folder outputs/bsa/scaffolds/alignment

0 commit comments

Comments
 (0)