Skip to content

Commit 9ee72e3

Browse files
Improved logging
1 parent f0aa0c0 commit 9ee72e3

File tree

1 file changed

+12
-14
lines changed

1 file changed

+12
-14
lines changed

reform.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
## Importing gzip or pgzip module for file compression
1111
print("------------------------------------------")
1212
print(f"Compression Library Use:")
13-
print("------------------------------------------")
1413
try:
1514
import pgzip as gzip_module
1615
print(f"Using pgzip for gzip operations.")
@@ -25,8 +24,7 @@ def main():
2524

2625
## Print reference file paths at start of process
2726
print("------------------------------------------")
28-
print(f"Path of Reference Files:")
29-
print("------------------------------------------")
27+
print(f"Path to Reference Files:")
3028
print(f"Reference FASTA: {os.path.realpath(in_arg.ref_fasta)}")
3129
print(f"Reference Annotation: {os.path.realpath(in_arg.ref_gff)}")
3230

@@ -43,24 +41,24 @@ def main():
4341
if hasattr(in_arg, 'chrom') and in_arg.chrom is not None:
4442
## Modify existing chrom seq
4543
print("-------------------------------------------")
46-
print(f"Begin modification from in{index+1}.fa")
44+
print(f"Begin modification from {in_arg.in_fasta[index]}")
4745
print("-------------------------------------------")
4846
new_fasta, annotation_ext, new_gff_path, prev_fasta_path, prev_gff_path = \
4947
modify_existing_chrom_seq(in_arg, index, prev_fasta_path, prev_modifications, \
5048
iterations, prev_gff_path)
5149
else:
5250
## Add new chrom seq
5351
print("-------------------------------------------")
54-
print(f"Begin adding a new chromosome from in{index+1}.fa")
52+
print(f"Begin adding a new chromosome from {in_arg.in_fasta[index]}")
5553
print("-------------------------------------------")
5654
new_fasta, annotation_ext, new_gff_path, prev_fasta_path, prev_gff_path = \
5755
add_new_chrom_seq(in_arg, index, prev_fasta_path, prev_gff_path, iterations)
5856

5957
print("------------------------------------------")
6058
print(f"Reform Complete")
61-
print("------------------------------------------")
6259
print(f"New .fa file created: {os.path.realpath(new_fasta)}")
6360
print(f"New {annotation_ext} file created: {os.path.realpath(new_gff_path)}")
61+
print("------------------------------------------")
6462

6563
def modify_existing_chrom_seq(in_arg, index, prev_fasta_path, prev_modifications, iterations, prev_gff_path):
6664
"""
@@ -87,7 +85,7 @@ def modify_existing_chrom_seq(in_arg, index, prev_fasta_path, prev_modifications
8785
prev_modifications.append((position,length_changed))
8886
if position != down_position:
8987
print(f"Removing nucleotides from position {position} - {down_position}")
90-
print(f"Proceeding to insert sequence '{record.description}' from {in_arg.in_fasta[index]} at position {position} on chromosome {in_arg.chrom}")
88+
print(f"Proceeding to insert sequence '{record.description.strip()}' from {in_arg.in_fasta[index]} at position {position} on chromosome {in_arg.chrom}")
9189
## Build the new chromosome sequence with the inserted_seq
9290
## If the chromosome sequence length is in the header, replace it with new length
9391
new_seq = existing_seq_str[:position] + str(record.seq) + existing_seq_str[down_position:]
@@ -218,7 +216,7 @@ def read_fasta(in_arg, index, prev_fasta_path):
218216
except Exception as e:
219217
raise ValueError(f"Error parsing FASTA file: {str(e)}")
220218
print(f"Preparing to create new FASTA file")
221-
print(f"Original Input FASTA: {real_path_fa}")
219+
print(f"Input FASTA: {real_path_fa}")
222220
## Generate index of sequences from ref reference fasta
223221
if prev_fasta_path:
224222
chrom_seqs = index_fasta(prev_fasta_path)
@@ -236,7 +234,7 @@ def check_gff(in_arg, index):
236234
raise FileNotFoundError(f"Error: File {filename_gff} does not exist.")
237235
real_path_gff = os.path.realpath(filename_gff)
238236
print("Preparing to create new annotation file")
239-
print(f"Original Input Annotation: {real_path_gff}")
237+
print(f"Input Annotation: {real_path_gff}")
240238
print() ### print new line
241239

242240
def index_fasta(fasta_path):
@@ -361,8 +359,8 @@ def get_in_gff_lines(in_gff=None, existing_chrom=None, new_chrom=None, sequence_
361359
line_elements = line.split('\t')
362360
chorme_id = existing_chrom if existing_chrom else new_chrom
363361
if line_elements[0] != chorme_id:
364-
print("** Warning: The chromosome name in the GFF file does not match the new chromosome name.")
365-
print(f"Correct the chromosome name {line_elements[0]} to {chorme_id}")
362+
print(f"** Warning: Mismatch detected between chromosome name in input annotation ({line_elements[0]}) and command line parameter ({chorme_id}).")
363+
print(f"Using command line chromosome name: {chorme_id}")
366364
line_elements[0] = chorme_id
367365
if not valid_gff_line(line_elements):
368366
exit()
@@ -443,7 +441,7 @@ def calculate_new_length_for_in_gff(in_gff_lines, position, sequence_length):
443441
## l[3] is start position of fasta in in.gtf and l[4] is end position
444442
seq_id = l[0]
445443
if int(l[4]) - int(l[3]) + 1 != sequence_length:
446-
print(f"** WARNING: Inconsistent length for {seq_id}. Correcting start position to 1 and end position to {sequence_length}.")
444+
print(f"** WARNING: Annotation start and end positions do not match the sequence length in the FASTA input. Adjusting to match the input sequence: start=1, end={sequence_length}.\n→ Affected annotation: {in_gff_lines}")
447445
## Correct start(l[3]) to 1 and end(l[4]) to length of insert fasta
448446
new_gff_line = modify_gff_line(
449447
l, start=1 + position, end=sequence_length + position)
@@ -829,7 +827,7 @@ def get_input_args():
829827
in_args.in_fasta = in_args.in_fasta.split(',')
830828
in_args.in_gff = in_args.in_gff.split(',')
831829
if (len(in_args.in_fasta) != len(in_args.in_gff)):
832-
print("** Error: The number of inserted FASTA files does not match the number of GTF files, or their counts and positions do not align.")
830+
print("** Error: The number of inserted FASTA files does not match the number of annotation files, or their counts and positions do not align.")
833831
exit()
834832
else:
835833
iterations = len(in_args.in_fasta)
@@ -864,7 +862,7 @@ def get_input_args():
864862
parser.error("** Error: When using --new_chrom, you cannot provide --position, --upstream_fasta, or --downstream_fasta.")
865863
exit()
866864
## Convert new_chrom from string to list
867-
in_args.new_chrom = in_args.new_chrom.split(',')
865+
in_args.new_chrom = [x.strip() for x in in_args.new_chrom.split(',')]
868866

869867
return in_args, iterations
870868

0 commit comments

Comments
 (0)