1010## Importing gzip or pgzip module for file compression
1111print ("------------------------------------------" )
1212print (f"Compression Library Use:" )
13- print ("------------------------------------------" )
1413try :
1514 import pgzip as gzip_module
1615 print (f"Using pgzip for gzip operations." )
@@ -25,8 +24,7 @@ def main():
2524
2625 ## Print reference file paths at start of process
2726 print ("------------------------------------------" )
28- print (f"Path of Reference Files:" )
29- print ("------------------------------------------" )
27+ print (f"Path to Reference Files:" )
3028 print (f"Reference FASTA: { os .path .realpath (in_arg .ref_fasta )} " )
3129 print (f"Reference Annotation: { os .path .realpath (in_arg .ref_gff )} " )
3230
@@ -43,24 +41,24 @@ def main():
4341 if hasattr (in_arg , 'chrom' ) and in_arg .chrom is not None :
4442 ## Modify existing chrom seq
4543 print ("-------------------------------------------" )
46- print (f"Begin modification from in { index + 1 } .fa " )
44+ print (f"Begin modification from { in_arg . in_fasta [ index ] } " )
4745 print ("-------------------------------------------" )
4846 new_fasta , annotation_ext , new_gff_path , prev_fasta_path , prev_gff_path = \
4947 modify_existing_chrom_seq (in_arg , index , prev_fasta_path , prev_modifications , \
5048 iterations , prev_gff_path )
5149 else :
5250 ## Add new chrom seq
5351 print ("-------------------------------------------" )
54- print (f"Begin adding a new chromosome from in { index + 1 } .fa " )
52+ print (f"Begin adding a new chromosome from { in_arg . in_fasta [ index ] } " )
5553 print ("-------------------------------------------" )
5654 new_fasta , annotation_ext , new_gff_path , prev_fasta_path , prev_gff_path = \
5755 add_new_chrom_seq (in_arg , index , prev_fasta_path , prev_gff_path , iterations )
5856
5957 print ("------------------------------------------" )
6058 print (f"Reform Complete" )
61- print ("------------------------------------------" )
6259 print (f"New .fa file created: { os .path .realpath (new_fasta )} " )
6360 print (f"New { annotation_ext } file created: { os .path .realpath (new_gff_path )} " )
61+ print ("------------------------------------------" )
6462
6563def modify_existing_chrom_seq (in_arg , index , prev_fasta_path , prev_modifications , iterations , prev_gff_path ):
6664 """
@@ -87,7 +85,7 @@ def modify_existing_chrom_seq(in_arg, index, prev_fasta_path, prev_modifications
8785 prev_modifications .append ((position ,length_changed ))
8886 if position != down_position :
8987 print (f"Removing nucleotides from position { position } - { down_position } " )
90- print (f"Proceeding to insert sequence '{ record .description } ' from { in_arg .in_fasta [index ]} at position { position } on chromosome { in_arg .chrom } " )
88+ print (f"Proceeding to insert sequence '{ record .description . strip () } ' from { in_arg .in_fasta [index ]} at position { position } on chromosome { in_arg .chrom } " )
9189 ## Build the new chromosome sequence with the inserted_seq
9290 ## If the chromosome sequence length is in the header, replace it with new length
9391 new_seq = existing_seq_str [:position ] + str (record .seq ) + existing_seq_str [down_position :]
@@ -218,7 +216,7 @@ def read_fasta(in_arg, index, prev_fasta_path):
218216 except Exception as e :
219217 raise ValueError (f"Error parsing FASTA file: { str (e )} " )
220218 print (f"Preparing to create new FASTA file" )
221- print (f"Original Input FASTA: { real_path_fa } " )
219+ print (f"Input FASTA: { real_path_fa } " )
222220 ## Generate index of sequences from ref reference fasta
223221 if prev_fasta_path :
224222 chrom_seqs = index_fasta (prev_fasta_path )
@@ -236,7 +234,7 @@ def check_gff(in_arg, index):
236234 raise FileNotFoundError (f"Error: File { filename_gff } does not exist." )
237235 real_path_gff = os .path .realpath (filename_gff )
238236 print ("Preparing to create new annotation file" )
239- print (f"Original Input Annotation: { real_path_gff } " )
237+ print (f"Input Annotation: { real_path_gff } " )
240238 print () ### print new line
241239
242240def index_fasta (fasta_path ):
@@ -361,8 +359,8 @@ def get_in_gff_lines(in_gff=None, existing_chrom=None, new_chrom=None, sequence_
361359 line_elements = line .split ('\t ' )
362360 chorme_id = existing_chrom if existing_chrom else new_chrom
363361 if line_elements [0 ] != chorme_id :
364- print ("** Warning: The chromosome name in the GFF file does not match the new chromosome name ." )
365- print (f"Correct the chromosome name { line_elements [ 0 ] } to { chorme_id } " )
362+ print (f "** Warning: Mismatch detected between chromosome name in input annotation ( { line_elements [ 0 ] } ) and command line parameter ( { chorme_id } ) ." )
363+ print (f"Using command line chromosome name: { chorme_id } " )
366364 line_elements [0 ] = chorme_id
367365 if not valid_gff_line (line_elements ):
368366 exit ()
@@ -443,7 +441,7 @@ def calculate_new_length_for_in_gff(in_gff_lines, position, sequence_length):
443441 ## l[3] is start position of fasta in in.gtf and l[4] is end position
444442 seq_id = l [0 ]
445443 if int (l [4 ]) - int (l [3 ]) + 1 != sequence_length :
446- print (f"** WARNING: Inconsistent length for { seq_id } . Correcting start position to 1 and end position to { sequence_length } ." )
444+ print (f"** WARNING: Annotation start and end positions do not match the sequence length in the FASTA input. Adjusting to match the input sequence: start=1, end= { sequence_length } .\n → Affected annotation: { in_gff_lines } " )
447445 ## Correct start(l[3]) to 1 and end(l[4]) to length of insert fasta
448446 new_gff_line = modify_gff_line (
449447 l , start = 1 + position , end = sequence_length + position )
@@ -829,7 +827,7 @@ def get_input_args():
829827 in_args .in_fasta = in_args .in_fasta .split (',' )
830828 in_args .in_gff = in_args .in_gff .split (',' )
831829 if (len (in_args .in_fasta ) != len (in_args .in_gff )):
832- print ("** Error: The number of inserted FASTA files does not match the number of GTF files, or their counts and positions do not align." )
830+ print ("** Error: The number of inserted FASTA files does not match the number of annotation files, or their counts and positions do not align." )
833831 exit ()
834832 else :
835833 iterations = len (in_args .in_fasta )
@@ -864,7 +862,7 @@ def get_input_args():
864862 parser .error ("** Error: When using --new_chrom, you cannot provide --position, --upstream_fasta, or --downstream_fasta." )
865863 exit ()
866864 ## Convert new_chrom from string to list
867- in_args .new_chrom = in_args .new_chrom .split (',' )
865+ in_args .new_chrom = [ x . strip () for x in in_args .new_chrom .split (',' )]
868866
869867 return in_args , iterations
870868
0 commit comments